In [84]:
%pip install transformer_lens
%pip install wandb

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0mNote: you may need to restart the kernel to use updated packages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0mNote: you may need to restart the kernel to use updated packages.


In [85]:
import torch
import numpy as np
from functools import partial
import einops
import random
import pprint
import json
from pathlib import Path
from transformer_lens import HookedTransformer
from transformer_lens.utils import (
    load_dataset,
    tokenize_and_concatenate,
    get_act_name,
    download_file_from_hf,
)
import tqdm
from torch import nn
from torch.nn import functional as F
import wandb
# %%

In [86]:
default_cfg = {
    "seed": 43,
    "batch_size": 4096,
    "buffer_mult": 384,
    "lr": 1e-4,
    "num_tokens": int(2e9), #5525253
    "l1_coeff": 3e-4,
    "beta1": 0.9,
    "beta2": 0.99,
    "dict_mult": 8,
    "seq_len": 1024, #1024,
    "enc_dtype":"fp32",
    "remove_rare_dir": False,
    "model_name": "gelu-1l",
    "site": "mlp_out",
    "layer": 0,
    "device": "cuda"
}
    
site_to_size = {
    "mlp_out": 512,
    "post": 2048,
    "resid_pre": 512,
    "resid_mid": 512,
    "resid_post": 512,
}

# %%
def post_init_cfg(cfg):
    cfg["model_batch_size"] = cfg["batch_size"] // cfg["seq_len"] * 16
    cfg["buffer_size"] = cfg["batch_size"] * cfg["buffer_mult"]
    cfg["buffer_batches"] = cfg["buffer_size"] // cfg["seq_len"]
    cfg["act_name"] = get_act_name(cfg["site"], cfg["layer"])
    cfg["act_size"] = site_to_size[cfg["site"]]
    cfg["dict_size"] = cfg["act_size"] * cfg["dict_mult"]
    cfg["name"] = f"{cfg['model_name']}_{cfg['layer']}_{cfg['dict_size']}_{cfg['site']}"
cfg = default_cfg
post_init_cfg(cfg)
pprint.pprint(cfg)
# %%
post_init_cfg(cfg)
pprint.pprint(cfg)

{'act_name': 'blocks.0.hook_mlp_out',
 'act_size': 512,
 'batch_size': 4096,
 'beta1': 0.9,
 'beta2': 0.99,
 'buffer_batches': 1536,
 'buffer_mult': 384,
 'buffer_size': 1572864,
 'device': 'cuda',
 'dict_mult': 8,
 'dict_size': 4096,
 'enc_dtype': 'fp32',
 'l1_coeff': 0.0003,
 'layer': 0,
 'lr': 0.0001,
 'model_batch_size': 64,
 'model_name': 'gelu-1l',
 'name': 'gelu-1l_0_4096_mlp_out',
 'num_tokens': 2000000000,
 'remove_rare_dir': False,
 'seed': 43,
 'seq_len': 1024,
 'site': 'mlp_out'}
{'act_name': 'blocks.0.hook_mlp_out',
 'act_size': 512,
 'batch_size': 4096,
 'beta1': 0.9,
 'beta2': 0.99,
 'buffer_batches': 1536,
 'buffer_mult': 384,
 'buffer_size': 1572864,
 'device': 'cuda',
 'dict_mult': 8,
 'dict_size': 4096,
 'enc_dtype': 'fp32',
 'l1_coeff': 0.0003,
 'layer': 0,
 'lr': 0.0001,
 'model_batch_size': 64,
 'model_name': 'gelu-1l',
 'name': 'gelu-1l_0_4096_mlp_out',
 'num_tokens': 2000000000,
 'remove_rare_dir': False,
 'seed': 43,
 'seq_len': 1024,
 'site': 'mlp_out'}


In [87]:
SEED = cfg["seed"]
GENERATOR = torch.manual_seed(SEED)
DTYPES = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}
np.random.seed(SEED)
random.seed(SEED)
torch.set_grad_enabled(True)

model = HookedTransformer.from_pretrained(cfg["model_name"]).to(DTYPES[cfg["enc_dtype"]]).to(cfg["device"])

# from parallel import DataParallelModel, DataParallelCriterion
# model = torch.nn.DataParallel(raw_model)

# n_layers = model.cfg.n_layers
# d_model = model.cfg.d_model
# n_heads = model.cfg.n_heads
# d_head = model.cfg.d_head
# d_mlp = model.cfg.d_mlp
# d_vocab = model.cfg.d_vocab
next(model.parameters()).device

Loaded pretrained model gelu-1l into HookedTransformer
Changing model dtype to torch.float32
Moving model to device:  cuda


device(type='cuda', index=0)

In [88]:
# %%
@torch.no_grad()
def get_acts(tokens, batch_size=1024):
    print(f"{tokens.get_device()=}")
    _, cache = model.run_with_cache(tokens, stop_at_layer=cfg["layer"]+1, names_filter=cfg["act_name"])
    acts = cache[cfg["act_name"]]
    acts = acts.reshape(-1, acts.shape[-1])
    subsample = torch.randperm(acts.shape[0], generator=GENERATOR)[:batch_size]
    subsampled_acts = acts[subsample, :]
    return subsampled_acts, acts
sub, acts = get_acts(torch.arange(20).reshape(2, 10), batch_size=3)
sub.shape, acts.shape

tokens.get_device()=-1


(torch.Size([3, 512]), torch.Size([20, 512]))

In [89]:
# %%
SAVE_DIR = Path("cache")
class AutoEncoder(nn.Module):
    def __init__(self, cfg, old_ae=None):
        super().__init__()
        d_hidden = cfg["dict_size"]
        l1_coeff = cfg["l1_coeff"]
        dtype = DTYPES[cfg["enc_dtype"]]
        torch.manual_seed(cfg["seed"])

        if old_ae is not None:
            wenc = torch.nn.init.kaiming_uniform_(torch.empty(cfg["act_size"], d_hidden, dtype=dtype, device=cfg["device"]))
            wenc[:, :old_ae.W_enc.shape[1]] = old_ae.W_enc
            self.W_enc = nn.Parameter(wenc)

            wdec = torch.nn.init.kaiming_uniform_(torch.empty(d_hidden, cfg["act_size"], dtype=dtype, device=cfg["device"]))
            wdec[:old_ae.W_dec.shape[0], :] = old_ae.W_dec
            self.W_dec = nn.Parameter(wdec)
            
            benc = torch.zeros(d_hidden, dtype=dtype, device=cfg["device"])
            benc[:old_ae.b_enc.shape[0]] = old_ae.b_enc
            self.b_enc = nn.Parameter(benc)
            self.b_dec = nn.Parameter(old_ae.b_dec)

            #TODO may need to normalize
            # self.W_dec.data[:] = self.W_dec / self.W_dec.norm(dim=-1, keepdim=True)
            print("~~~~~~~~~~~ INIT AND NOOOO NORMALIZED!")
        else:
            self.W_enc = nn.Parameter(torch.nn.init.kaiming_uniform_(torch.empty(cfg["act_size"], d_hidden, dtype=dtype, device=cfg["device"])))
            self.W_dec = nn.Parameter(torch.nn.init.kaiming_uniform_(torch.empty(d_hidden, cfg["act_size"], dtype=dtype, device=cfg["device"])))
            self.b_enc = nn.Parameter(torch.zeros(d_hidden, dtype=dtype, device=cfg["device"]))
            self.b_dec = nn.Parameter(torch.zeros(cfg["act_size"], dtype=dtype, device=cfg["device"]))

            self.W_dec.data[:] = self.W_dec / self.W_dec.norm(dim=-1, keepdim=True)

        self.d_hidden = d_hidden
        self.l1_coeff = l1_coeff

        self.to(cfg["device"])

    def forward(self, x):
        x_cent = x - self.b_dec
        acts = F.relu(x_cent @ self.W_enc + self.b_enc)
        print(f"{acts.shape=}")
        acts = acts.clone()
        acts[..., 240] = 0
        x_reconstruct = acts @ self.W_dec + self.b_dec
        l2_loss = (x_reconstruct.float() - x.float()).pow(2).sum(-1).mean(0)
        l1_loss = self.l1_coeff * (acts.float().abs().sum())
        loss = l2_loss + l1_loss
        return loss, x_reconstruct, acts, l2_loss, l1_loss

    @torch.no_grad()
    def make_decoder_weights_and_grad_unit_norm(self):
        W_dec_normed = self.W_dec / self.W_dec.norm(dim=-1, keepdim=True)
        W_dec_grad_proj = (self.W_dec.grad * W_dec_normed).sum(-1, keepdim=True) * W_dec_normed
        self.W_dec.grad -= W_dec_grad_proj
        # Bugfix(?) for ensuring W_dec retains unit norm, this was not there when I trained my original autoencoders.
        self.W_dec.data = W_dec_normed

    def get_version(self):
        version_list = [int(file.name.split(".")[0]) for file in list(SAVE_DIR.iterdir()) if "pt" in str(file)]
        if len(version_list):
            return 1+max(version_list)
        else:
            return 0

    def save(self):
        version = self.get_version()
        torch.save(self.state_dict(), SAVE_DIR/(str(version)+".pt"))
        with open(SAVE_DIR/(str(version)+"_cfg.json"), "w") as f:
            json.dump(cfg, f)
        print("Saved as version", version)

    @classmethod
    def load(cls, version):
        cfg = (json.load(open(SAVE_DIR/(str(version)+"_cfg.json"), "r")))
        pprint.pprint(cfg)
        self = cls(cfg=cfg)
        self.load_state_dict(torch.load(SAVE_DIR/(str(version)+".pt")))
        return self

    @classmethod
    def load_from_hf(cls, version):
        """
        Loads the saved autoencoder from HuggingFace.

        Version is expected to be an int, or "run1" or "run2"

        version 25 is the final checkpoint of the first autoencoder run,
        version 47 is the final checkpoint of the second autoencoder run.
        """
        if version=="run1":
            version = 25
        elif version=="run2":
            version = 47

        cfg = download_file_from_hf("NeelNanda/sparse_autoencoder", f"{version}_cfg.json")
        pprint.pprint(cfg)
        self = cls(cfg=cfg)
        self.load_state_dict(download_file_from_hf("NeelNanda/sparse_autoencoder", f"{version}.pt", force_is_torch=True))
        return self

# %%
def shuffle_data(all_tokens):
    print("Shuffled data")
    return all_tokens[torch.randperm(all_tokens.shape[0])]
    
    # torch.save(all_tokens_reshaped, "data/c4_code_2b_tokens_reshaped_filtered.pt")
# else:
    # data = datasets.load_from_disk("/Users/wenx/Documents/playground/translation/data/c4_code_tokenized_2b.hf")
    # all_tokens = torch.load("data/c4-10k.pt")
    # all_tokens = shuffle_data(all_tokens)


# %%

In [90]:
## FILTER DATA
# encoder = AutoEncoder(cfg, old_ae)

DATA_DIR = Path("data/")
loading_data_first_time = False
if loading_data_first_time:
    data = load_dataset("NeelNanda/c4-code-tokenized-2b", split="train") # c4-code-tokenized-2b
    # data = tokenize_and_concatenate(data, model.tokenizer, max_length=128)
    data.set_format(type="torch", columns=["tokens"])
    all_tokens = data["tokens"].to(cfg["device"])
    print(f"{all_tokens.shape=}")

    all_tokens_reshaped = einops.rearrange(all_tokens, "batch (x seq_len) -> (batch x) seq_len", x=8, seq_len=128)
    all_tokens_reshaped[:, 0] = model.tokenizer.bos_token_id
    all_tokens_reshaped = all_tokens_reshaped[torch.randperm(all_tokens_reshaped.shape[0])]
    print(f"{all_tokens_reshaped.shape=}")

    all_tokens = all_tokens_reshaped

    # check_hyphen = all_tokens == 15
    # all_tokens = all_tokens[check_hyphen.sum(-1)>0]
    # print(f"{all_tokens.shape=}")

@torch.inference_mode()
def get_activating_tokens(
    tokens, #: Int[Tensor, "batch seq"],
    model, #: HookedTransformer,
    autoencoder, #: AutoEncoder,
    feature_idx: int,
    autoencoder_B: bool = False,
): # -> Int[Tensor, "batch seq"]:
    '''
    Returns the indices & values for the highest-activating tokens in the given batch of data.
    '''
    batch, seq = tokens.shape
    _, cache = model.run_with_cache(tokens, names_filter=['blocks.0.hook_mlp_out'])
    mlp_out = cache['blocks.0.hook_mlp_out']

    no_bias = mlp_out - autoencoder.b_dec
    actsf_raw = einops.einsum(autoencoder.W_enc[:, feature_idx], no_bias, "d_model, batch seq d_model -> batch seq")

    has_activated = actsf_raw.abs() > 2
    check_seq = has_activated.sum(-1) # num of tokens in the sequence activate the feature
    result = tokens[check_seq > 0]
    # print(f"{result.shape=}")
    return result



In [91]:
if loading_data_first_time:
    encoder = AutoEncoder.load(16)
    # get_activating_tokens(all_tokens[range(0, 100), :], model, encoder, 240)
    tokens = None #torch.load("data/c4_code_tokenized_2b_filtered_10000.pt")
    step_size = 125
    for i in range((all_tokens.shape[0] // step_size) + 1):
        if i % 100 == 0:
            print(i)
        if i % 1000 == 0:
            torch.save(tokens, f"data/c4_code_tokenized_2b_filtered_{i}.pt")
        range_s = i * step_size
        range_e = min((i+1) * step_size, all_tokens.shape[0])
        filtered = get_activating_tokens(all_tokens[range(range_s, range_e)], model, encoder, 240)
        if tokens == None:
            tokens = filtered
        else:
            tokens = torch.concat((tokens, filtered), dim=0)
            # print(f"{tokens.shape=}")
    all_tokens = tokens
    torch.save(all_tokens, "data/c4_code_tokenized_2b_filtered_7500.pt")
else:
    all_tokens = torch.load("data/c4_code_tokenized_2b_filtered_1000.pt")
    all_tokens = shuffle_data(all_tokens)

print(f"{all_tokens.shape=}")

  all_tokens = torch.load("data/c4_code_tokenized_2b_filtered_1000.pt")


Shuffled data
all_tokens.shape=torch.Size([124610, 1024])


In [92]:
# %%
def replacement_hook(mlp_post, hook, encoder):
    mlp_post_reconstr = encoder(mlp_post)[1]
    return mlp_post_reconstr

def mean_ablate_hook(mlp_post, hook):
    mlp_post[:] = mlp_post.mean([0, 1])
    return mlp_post

def zero_ablate_hook(mlp_post, hook):
    mlp_post[:] = 0.
    return mlp_post

@torch.no_grad()
def get_recons_loss(num_batches=5, local_encoder=None):
    if local_encoder is None:
        local_encoder = encoder
    loss_list = []
    for i in range(num_batches):
        tokens = all_tokens[torch.randperm(len(all_tokens))[:cfg["model_batch_size"]]]
        loss = model(tokens, return_type="loss")
        recons_loss = model.run_with_hooks(tokens, return_type="loss", fwd_hooks=[(cfg["act_name"], partial(replacement_hook, encoder=local_encoder))])
        # mean_abl_loss = model.run_with_hooks(tokens, return_type="loss", fwd_hooks=[(cfg["act_name"], mean_ablate_hook)])
        zero_abl_loss = model.run_with_hooks(tokens, return_type="loss", fwd_hooks=[(cfg["act_name"], zero_ablate_hook)])
        loss_list.append((loss, recons_loss, zero_abl_loss))
    losses = torch.tensor(loss_list)
    loss, recons_loss, zero_abl_loss = losses.mean(0).tolist()

    print(loss, recons_loss, zero_abl_loss)
    score = ((zero_abl_loss - recons_loss)/(zero_abl_loss - loss))
    print(f"{score:.2%}")
    # print(f"{((zero_abl_loss - mean_abl_loss)/(zero_abl_loss - loss)).item():.2%}")
    return score, loss, recons_loss, zero_abl_loss
# print(get_recons_loss())
# %%
# Frequency
@torch.no_grad()
def get_freqs(num_batches=25, local_encoder=None):
    if local_encoder is None:
        local_encoder = encoder
    act_freq_scores = torch.zeros(local_encoder.d_hidden, dtype=torch.float32, device=cfg["device"])
    total = 0
    for i in tqdm.trange(num_batches):
        tokens = all_tokens[torch.randperm(len(all_tokens))[:cfg["model_batch_size"]]]

        _, cache = model.run_with_cache(tokens, stop_at_layer=cfg["layer"]+1, names_filter=cfg["act_name"])
        acts = cache[cfg["act_name"]]
        acts = acts.reshape(-1, cfg["act_size"])

        hidden = local_encoder(acts)[2]

        act_freq_scores += (hidden > 0).sum(0)
        total+=hidden.shape[0]
    act_freq_scores /= total
    num_dead = (act_freq_scores==0).float().mean()
    print("Num dead", num_dead)
    return act_freq_scores
# %%
@torch.no_grad()
def re_init(indices, encoder):
    new_W_enc = (torch.nn.init.kaiming_uniform_(torch.zeros_like(encoder.W_enc)))
    new_W_dec = (torch.nn.init.kaiming_uniform_(torch.zeros_like(encoder.W_dec)))
    new_b_enc = (torch.zeros_like(encoder.b_enc))
    print(new_W_dec.shape, new_W_enc.shape, new_b_enc.shape)
    encoder.W_enc.data[:, indices] = new_W_enc[:, indices]
    encoder.W_dec.data[indices, :] = new_W_dec[indices, :]
    encoder.b_enc.data[indices] = new_b_enc[indices]


In [93]:
# %%
class Buffer():
    """
    This defines a data buffer, to store a bunch of MLP acts that can be used to train the autoencoder. It'll automatically run the model to generate more when it gets halfway empty.
    """
    def __init__(self, cfg):
        self.buffer = torch.zeros((cfg["buffer_size"], cfg["act_size"]), dtype=torch.bfloat16, requires_grad=False, device=cfg["device"])
        self.cfg = cfg
        self.token_pointer = 0
        self.first = True
        self.refresh()

    @torch.no_grad()
    def refresh(self):

        self.pointer = 0
        with torch.autocast("cuda", torch.bfloat16):
            if self.first:
                num_batches = self.cfg["buffer_batches"]
            else:
                num_batches = self.cfg["buffer_batches"]//2
            self.first = False
            # print(f"{all_tokens.shape=}")
            # print(f"{num_batches=}")
            # print(self.cfg["model_batch_size"])
            # print(f"{=}")
            for _ in range(0, num_batches, self.cfg["model_batch_size"]):
                tokens = all_tokens[self.token_pointer:self.token_pointer+self.cfg["model_batch_size"]]
                _, cache = model.run_with_cache(tokens, stop_at_layer=cfg["layer"]+1, names_filter=cfg["act_name"])
                acts = cache[cfg["act_name"]].reshape(-1, self.cfg["act_size"])
                # print(f"{tokens.shape=}")
                # print(f"{acts.shape=}")
                # print(f"{self.pointer=}")
                # print(f"{self.buffer.shape=}")
                # print(cache[cfg["act_name"]].shape)
                # print(tokens.shape, acts.shape, self.pointer, self.token_pointer)
                self.buffer[self.pointer: self.pointer+acts.shape[0]] = acts
                self.pointer += acts.shape[0]
                self.token_pointer += self.cfg["model_batch_size"]
                # if self.token_pointer > all_tokens.shape[0] - self.cfg["model_batch_size"]:
                #     self.token_pointer = 0

        self.pointer = 0
        self.buffer = self.buffer[torch.randperm(self.buffer.shape[0])]

    @torch.no_grad()
    def next(self):
        out = self.buffer[self.pointer:self.pointer+self.cfg["batch_size"]]
        self.pointer += self.cfg["batch_size"]
        if self.pointer > self.buffer.shape[0]//2 - self.cfg["batch_size"]:
            # print("Refreshing the buffer!")
            self.refresh()
        return out

In [94]:
# %% TRAIN
old_ae = AutoEncoder.load(16)
encoder = AutoEncoder(cfg, old_ae)
# encoder = AutoEncoder(cfg)
buffer = Buffer(cfg)
# Code used to remove the "rare freq direction", the shared direction among the ultra low frequency features.
# I experimented with removing it and retraining the autoencoder.
if cfg["remove_rare_dir"]:
    rare_freq_dir = torch.load("rare_freq_dir.pt")
    rare_freq_dir.requires_grad = False

# %%
try:
    wandb.init(project="sae-gelu-1l-mlp-out")
    num_batches = cfg["num_tokens"] // cfg["batch_size"]
    print(f"{num_batches=}")
    # model_num_batches = cfg["model_batch_size"] * num_batches
    encoder_optim = torch.optim.Adam(encoder.parameters(), lr=cfg["lr"], betas=(cfg["beta1"], cfg["beta2"]))
    recons_scores = []
    act_freq_scores_list = []
    for i in tqdm.trange(num_batches):
        i = i % all_tokens.shape[0]
        acts = buffer.next()
        loss, x_reconstruct, mid_acts, l2_loss, l1_loss = encoder(acts)
        loss.backward()
        encoder.make_decoder_weights_and_grad_unit_norm()
        encoder_optim.step()
        encoder_optim.zero_grad()
        loss_dict = {"loss": loss.item(), "l2_loss": l2_loss.item(), "l1_loss": l1_loss.item()}
        del loss, x_reconstruct, mid_acts, l2_loss, l1_loss, acts
        if (i) % 100 == 0:
            wandb.log(loss_dict)
            print(loss_dict)
        if (i) % 1000 == 0:
            x = (get_recons_loss(local_encoder=encoder))
            print("Reconstruction:", x)
            recons_scores.append(x[0])
            freqs = get_freqs(5, local_encoder=encoder)
            act_freq_scores_list.append(freqs)
            # histogram(freqs.log10(), marginal="box", histnorm="percent", title="Frequencies")
            wandb.log({
                "recons_score": x[0],
                "dead": (freqs==0).float().mean().item(),
                "below_1e-6": (freqs<1e-6).float().mean().item(),
                "below_1e-5": (freqs<1e-5).float().mean().item(),
            })
        if (i+1) % 2000 == 0:
            encoder.save()
        if (i+1) % 30000 == 0:
            wandb.log({"reset_neurons": 0.0})
            freqs = get_freqs(50, local_encoder=encoder)
            to_be_reset = (freqs<10**(-5.5))
            print("Resetting neurons!", to_be_reset.sum())
            re_init(to_be_reset, encoder)

except Exception as e:
    print(e.message)
finally:
    encoder.save()
# %%

{'act_name': 'blocks.0.hook_mlp_out',
 'act_size': 512,
 'batch_size': 4096,
 'beta1': 0.9,
 'beta2': 0.99,
 'buffer_batches': 1536,
 'buffer_mult': 384,
 'buffer_size': 1572864,
 'device': 'cuda',
 'dict_mult': 1,
 'dict_size': 512,
 'enc_dtype': 'fp32',
 'l1_coeff': 0.0003,
 'layer': 0,
 'lr': 0.0001,
 'model_batch_size': 64,
 'model_name': 'gelu-1l',
 'name': 'gelu-1l_0_512_mlp_out',
 'num_tokens': 2000000000,
 'remove_rare_dir': False,
 'seed': 49,
 'seq_len': 1024,
 'site': 'mlp_out'}
~~~~~~~~~~~ INIT AND NOOOO NORMALIZED!


  self.load_state_dict(torch.load(SAVE_DIR/(str(version)+".pt")))


VBox(children=(Label(value='0.026 MB of 0.026 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
below_1e-5,▁▁█▆▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
below_1e-6,▁▁█▅▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dead,▁▁█▅▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
l1_loss,▁██▆▆▅▄▅▅▄▄▃▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▁▁▁▁▁
l2_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss,█▆▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
recons_score,▁▆▇█████████████████████████████████████
reset_neurons,▁▁

0,1
below_1e-5,0.0
below_1e-6,0.0
dead,0.0
l1_loss,57.0437
l2_loss,67.52884
loss,124.57254
recons_score,0.9131
reset_neurons,0.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111959168273542, max=1.0…

num_batches=488281


  0%|          | 0/488281 [00:00<?, ?it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 1490.86474609375, 'l2_loss': 722.5718994140625, 'l1_loss': 768.2927856445312}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.83747935295105 5.80426025390625 8.924461364746094
61.34%
Reconstruction: (0.6133697944292156, 3.83747935295105, 5.80426025390625, 8.924461364746094)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.40it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A
  0%|          | 1/488281 [00:04<612:57:25,  4.52s/it]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 29/488281 [00:04<13:20:42, 10.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 57/488281 [00:04<5:06:57, 26.51it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 85/488281 [00:05<2:44:38, 49.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 113/488281 [00:05<1:48:09, 75.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 350.74822998046875, 'l2_loss': 129.3211212158203, 'l1_loss': 221.4271240234375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  0%|          | 141/488281 [00:05<1:22:57, 98.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 169/488281 [00:05<1:11:06, 114.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 183/488281 [00:05<1:07:46, 120.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 211/488281 [00:06<1:45:39, 76.99it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 214.26229858398438, 'l2_loss': 114.30513000488281, 'l1_loss': 99.9571762084961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  0%|          | 239/488281 [00:06<1:22:15, 98.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 267/488281 [00:06<1:10:48, 114.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 295/488281 [00:07<1:05:13, 124.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 183.92733764648438,

  0%|          | 323/488281 [00:07<1:02:30, 130.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 351/488281 [00:07<1:01:08, 132.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 379/488281 [00:07<1:00:29, 134.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 407/488281 [00:08<1:41:58, 79.73it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 175.14035034179688, 'l2_loss': 112.49976348876953, 'l1_loss': 62.64058303833008}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  0%|          | 435/488281 [00:08<1:20:30, 101.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 463/488281 [00:08<1:10:16, 115.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 491/488281 [00:08<1:04:56, 125.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 519/488281 [00:09<1:02:21, 130.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 172.2711639404297, 'l2_loss': 112.68539428710938, 'l1_loss': 59.58576965332031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  0%|          | 547/488281 [00:09<1:01:04, 133.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 575/488281 [00:09<1:00:26, 134.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 589/488281 [00:09<2:00:00, 67.73it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 169.6572265625, 'l2_loss': 111.86811828613281, 'l1_loss

  0%|          | 617/488281 [00:10<1:29:20, 90.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 645/488281 [00:10<1:14:16, 109.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 673/488281 [00:10<1:06:54, 121.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 701/488281 [00:10<1:03:19, 128.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 167.78994750976562, 'l2_loss': 111.10804748535156, 'l1_loss': 56.68190383911133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  0%|          | 729/488281 [00:10<1:01:30, 132.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 757/488281 [00:11<1:00:38, 133.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 785/488281 [00:11<1:41:58, 79.68it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 813/488281 [00:11<1:20:29, 100.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 169.1166229248047, 'l2_loss': 112.23011779785156, 'l1_loss': 56.88650131225586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  0%|          | 841/488281 [00:12<1:09:55, 116.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 869/488281 [00:12<1:04:44, 125.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 897/488281 [00:12<1:02:11, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 168.81838989257812, 'l2_loss': 112.11116790771484, 'l1_loss': 56.70722579956055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  0%|          | 925/488281 [00:12<1:01:00, 133.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 953/488281 [00:12<1:00:22, 134.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 981/488281 [00:13<1:41:47, 79.79it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 995/488281 [00:13<1:29:41, 90.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 169.15567016601562, 'l2_loss': 112.27291870117188, 'l1_loss': 56.882755279541016}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8410446643829346 4.664158821105957 8.914651870727539
83.78%
Reconstruction: (0.8377654944013583, 3.8410446643829346, 4.664158821105957, 8.914651870727539)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 15.89it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.17it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  0%|          | 1021/488281 [00:18<10:43:24, 12.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1049/488281 [00:18<5:41:17, 23.79it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1077/488281 [00:18<3:16:39, 41.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1105/488281 [00:18<2:06:36, 64.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 168.57501220703125, 'l2_loss': 111.76990509033203, 'l1_loss': 56.80511474609375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  0%|          | 1133/488281 [00:19<1:32:26, 87.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1147/488281 [00:19<1:22:37, 98.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1175/488281 [00:19<1:52:31, 72.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1203/488281 [00:19<1:25:36, 94.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 168.16412353515625, 'l2_loss': 111.28641510009766, 'l1_loss': 56.87770462036133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  0%|          | 1231/488281 [00:20<1:12:24, 112.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1259/488281 [00:20<1:05:55, 123.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1287/488281 [00:20<1:02:44, 129.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1315/488281 [00:20<1:01:12, 132.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 166.0334930419922, 'l2_loss': 108.77884674072266, 'l1_loss': 57.2546501159668}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  0%|          | 1343/488281 [00:20<1:00:27, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1357/488281 [00:21<1:59:42, 67.79it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1385/488281 [00:21<1:29:06, 91.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1413/488281 [00:21<1:14:08, 109.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 167.20021057128906, 'l2_loss': 108.35718536376953, 'l1_loss': 58.843021392822266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shap

  0%|          | 1441/488281 [00:22<1:06:45, 121.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1469/488281 [00:22<1:03:08, 128.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1497/488281 [00:22<1:01:37, 131.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 164.6986846923828, 'l2_loss': 104.61775207519531, 'l1_loss': 60.080936431884766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  0%|          | 1525/488281 [00:22<1:00:38, 133.77it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1553/488281 [00:23<1:41:51, 79.65it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1581/488281 [00:23<1:20:20, 100.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1609/488281 [00:23<1:09:49, 116.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 163.86744689941406, 'l2_loss': 102.261962890625, 'l1_loss': 61.6054801940918}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  0%|          | 1637/488281 [00:23<1:04:37, 125.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1665/488281 [00:24<1:02:05, 130.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1693/488281 [00:24<1:00:51, 133.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 161.3959503173828, 

  0%|          | 1721/488281 [00:24<1:00:16, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1749/488281 [00:25<1:41:35, 79.82it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1777/488281 [00:25<1:20:11, 101.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1805/488281 [00:25<1:09:43, 116.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 160.01907348632812, 'l2_loss': 95.49172973632812, 'l1_loss': 64.52735137939453}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  0%|          | 1833/488281 [00:25<1:04:35, 125.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1861/488281 [00:25<1:02:03, 130.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1889/488281 [00:26<1:00:49, 133.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1917/488281 [00:26<1:00:14, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 158.86842346191406, 'l2_loss': 93.16683959960938, 'l1_loss': 65.70158386230469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1945/488281 [00:26<1:41:27, 79.89it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 1973/488281 [00:26<1:20:07, 101.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 1987/488281 [00:27<1:13:57, 109.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 2000/488281 [00:27<1:12:17, 112.10it/s]

Saved as version 17
acts.shape=torch.Size([4096, 4096])
{'loss': 158.3299560546875, 'l2_loss': 91.68812561035156, 'l1_loss': 66.6418228149414}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8634750843048096 4.486502170562744 8.97762680053711
87.82%
Reconstruction: (0.8781758694642459, 3.8634750843048096, 4.486502170562744, 8.97762680053711)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.25it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0164, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  0%|          | 2027/488281 [00:31<10:28:04, 12.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 2055/488281 [00:32<5:34:27, 24.23it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 2083/488281 [00:32<3:13:23, 41.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 2111/488281 [00:32<2:04:58, 64.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 156.56246948242188, 'l2_loss': 89.60678100585938, 'l1_loss': 66.95569610595703}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 2125/488281 [00:32<2:44:36, 49.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 2153/488281 [00:33<1:51:00, 72.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 2181/488281 [00:33<1:24:45, 95.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 2209/488281 [00:33<1:11:56, 112.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 154.20899963378906, 'l2_loss': 87.18257141113281, 'l1_loss': 67.02642822265625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  0%|          | 2237/488281 [00:33<1:05:37, 123.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 2265/488281 [00:34<1:02:31, 129.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 2293/488281 [00:34<1:01:01, 132.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 153.4197998046875, 'l2_loss': 86.02623748779297, 'l1_loss': 67.39356994628906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 2321/488281 [00:34<1:41:54, 79.48it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  0%|          | 2349/488281 [00:34<1:20:19, 100.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 2377/488281 [00:35<1:09:44, 116.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  0%|          | 2405/488281 [00:35<1:04:36, 125.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 152.04098510742188, 'l2_loss': 84.34030151367188, 'l1_loss': 67.70069122314453}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  0%|          | 2433/488281 [00:35<1:02:01, 130.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2461/488281 [00:35<1:00:47, 133.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2489/488281 [00:35<1:00:10, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 2517/488281 [00:36<1:41:37, 79.66it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 150.9384765625, 'l2_loss': 83.52848052978516, 'l1_loss': 67.41000366210938}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

  1%|          | 2545/488281 [00:36<1:20:11, 100.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2573/488281 [00:36<1:09:40, 116.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2601/488281 [00:37<1:04:33, 125.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 149.9495849609375, 'l2_loss': 82.51228332519531, 'l1_loss': 67.43730926513672}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  1%|          | 2629/488281 [00:37<1:02:01, 130.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2657/488281 [00:37<1:00:47, 133.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2685/488281 [00:37<1:00:10, 134.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 2713/488281 [00:38<1:41:29, 79.73it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 148.80039978027344, 'l2_loss': 81.39141845703125, 'l1_loss': 67.40898132324219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 2741/488281 [00:38<1:20:07, 100.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2769/488281 [00:38<1:09:41, 116.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2797/488281 [00:38<1:04:34, 125.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2825/488281 [00:39<1:02:06, 130.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 148.6949462890625, 'l2_loss': 80.994873046875, 'l1_loss': 67.70006561279297}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  1%|          | 2853/488281 [00:39<1:00:53, 132.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2867/488281 [00:39<1:00:33, 133.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 2895/488281 [00:40<1:41:39, 79.58it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 147.88424682617188, 'l2_loss': 80.36487579345703, 'l1_loss': 67.51937103271484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 2923/488281 [00:40<1:20:20, 100.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2951/488281 [00:40<1:09:53, 115.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2979/488281 [00:40<1:04:44, 124.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 2993/488281 [00:40<1:03:15, 127.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 147.8463134765625, 'l2_loss': 80.02104949951172, 'l1_loss': 67.82527160644531}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.857891798019409 4.3773298263549805 8.949834823608398
89.80%
Reconstruction: (0.8979882481549394, 3.857891798019409, 4.3773298263549805, 8.949834823608398)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.23it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.1707, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  1%|          | 3021/488281 [00:45<10:08:51, 13.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3049/488281 [00:45<5:28:50, 24.59it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3063/488281 [00:45<4:08:08, 32.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3090/488281 [00:46<3:15:34, 41.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3118/488281 [00:46<2:05:52, 64.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 145.8404541015625, 'l2_loss': 78.71461486816406, 'l1_loss': 67.1258316040039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  1%|          | 3146/488281 [00:46<1:32:04, 87.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3174/488281 [00:46<1:15:36, 106.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3202/488281 [00:47<1:07:36, 119.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 145.3046875, 'l2_loss': 78.1419906616211, 'l1_loss': 67.16270446777344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Si

  1%|          | 3230/488281 [00:47<1:03:39, 127.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3258/488281 [00:47<1:01:42, 130.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3286/488281 [00:48<1:42:14, 79.06it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3314/488281 [00:48<1:20:37, 100.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 144.91488647460938, 'l2_loss': 78.14351654052734, 'l1_loss': 66.7713623046875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  1%|          | 3342/488281 [00:48<1:10:00, 115.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3370/488281 [00:48<1:04:47, 124.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3398/488281 [00:48<1:02:14, 129.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 144.72341918945312,

  1%|          | 3426/488281 [00:49<1:01:00, 132.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3454/488281 [00:49<1:00:23, 133.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3468/488281 [00:49<1:59:24, 67.67it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3496/488281 [00:50<1:28:59, 90.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 145.01141357421875, 'l2_loss': 77.67105102539062, 'l1_loss': 67.34036254882812}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 3524/488281 [00:50<1:14:29, 108.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3552/488281 [00:50<1:06:58, 120.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3580/488281 [00:50<1:03:16, 127.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3608/488281 [00:50<1:01:29, 131.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 144.15960693359375, 'l2_loss': 77.06556701660156, 'l1_loss': 67.09403991699219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 3636/488281 [00:51<1:00:35, 133.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3664/488281 [00:51<1:41:40, 79.44it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3692/488281 [00:51<1:20:16, 100.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 142.30169677734375,

  1%|          | 3720/488281 [00:52<1:09:47, 115.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3748/488281 [00:52<1:04:38, 124.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3776/488281 [00:52<1:02:07, 129.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3804/488281 [00:52<1:00:55, 132.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 142.89974975585938, 'l2_loss': 76.47056579589844, 'l1_loss': 66.4291763305664}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  1%|          | 3832/488281 [00:52<1:00:19, 133.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3860/488281 [00:53<1:41:27, 79.57it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 3888/488281 [00:53<1:20:10, 100.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3916/488281 [00:53<1:09:45, 115.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 143.03175354003906, 'l2_loss': 76.26344299316406, 'l1_loss': 66.768310546875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  1%|          | 3944/488281 [00:54<1:04:37, 124.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 3972/488281 [00:54<1:02:05, 129.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4000/488281 [00:54<1:03:17, 127.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 18
acts.shape=torch.Size([4096, 4096])
{'loss': 141.62081909179688, 'l2_loss': 75.26953125, 'l1_loss': 66.3512954711914}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 102


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.32it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.1506, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  1%|          | 4027/488281 [00:59<10:16:02, 13.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4052/488281 [00:59<6:26:41, 20.87it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 4080/488281 [00:59<3:35:30, 37.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4108/488281 [01:00<2:15:01, 59.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 142.30682373046875, 'l2_loss': 75.36436462402344, 'l1_loss': 66.94245147705078}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 4136/488281 [01:00<1:36:19, 83.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4164/488281 [01:00<1:17:33, 104.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4192/488281 [01:00<1:08:24, 117.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4220/488281 [01:00<1:03:57, 126.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 141.4639892578125, 'l2_loss': 75.0008316040039, 'l1_loss': 66.46315002441406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 4248/488281 [01:01<1:43:10, 78.19it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 4276/488281 [01:01<1:20:57, 99.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4304/488281 [01:01<1:10:06, 115.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 141.2493438720703, 'l2_loss': 74.5347671508789, 'l1_loss': 66.7145767211914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  1%|          | 4332/488281 [01:02<1:04:45, 124.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4360/488281 [01:02<1:02:07, 129.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4388/488281 [01:02<1:00:50, 132.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4402/488281 [01:02<1:00:29, 133.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 141.59033203125, 'l2_loss': 74.94725036621094, 'l1_loss': 66.64308166503906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 4430/488281 [01:03<1:41:27, 79.48it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 4458/488281 [01:03<1:20:06, 100.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4486/488281 [01:03<1:09:40, 115.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4514/488281 [01:03<1:04:33, 124.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 142.03662109375, 'l2_loss': 75.09136962890625, 'l1_loss': 66.94525146484375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  1%|          | 4542/488281 [01:03<1:02:01, 130.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 4570/488281 [01:04<1:01:00, 132.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4598/488281 [01:04<1:00:17, 133.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 140.18228149414062, 'l2_loss': 74.00698852539062, 'l1_loss': 66.17528533935547}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 4626/488281 [01:04<1:41:27, 79.45it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 4654/488281 [01:05<1:20:06, 100.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4682/488281 [01:05<1:09:38, 115.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4710/488281 [01:05<1:04:32, 124.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 140.34283447265625, 'l2_loss': 74.6292724609375, 'l1_loss': 65.71356964111328}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  1%|          | 4738/488281 [01:05<1:02:01, 129.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4766/488281 [01:05<1:00:46, 132.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4794/488281 [01:06<1:00:10, 133.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 4822/488281 [01:06<1:41:14, 79.58it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 140.2716064453125, 'l2_loss': 73.9998779296875, 'l1_loss': 66.27173614501953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  1%|          | 4850/488281 [01:06<1:19:58, 100.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4878/488281 [01:07<1:09:32, 115.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4906/488281 [01:07<1:04:27, 124.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 139.69668579101562, 'l2_loss': 73.29170227050781, 'l1_loss': 66.40498352050781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 4934/488281 [01:07<1:01:55, 130.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4962/488281 [01:07<1:00:41, 132.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 4990/488281 [01:07<1:00:05, 134.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.31it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0977, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  1%|          | 5018/488281 [01:13<10:45:36, 12.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5046/488281 [01:13<5:47:05, 23.20it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 5074/488281 [01:13<3:20:24, 40.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5102/488281 [01:13<2:08:33, 62.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 139.3402099609375, 'l2_loss': 73.478515625, 'l1_loss': 65.86170196533203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.

  1%|          | 5130/488281 [01:13<1:33:20, 86.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5158/488281 [01:14<1:16:05, 105.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5172/488281 [01:14<1:11:06, 113.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 5200/488281 [01:14<1:46:30, 75.59it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 138.2247772216797, 'l2_loss': 73.2082748413086, 'l1_loss': 65.0165023803711}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  1%|          | 5228/488281 [01:14<1:22:31, 97.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5256/488281 [01:15<1:10:45, 113.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5284/488281 [01:15<1:04:58, 123.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5312/488281 [01:15<1:02:09, 129.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 139.03933715820312, 'l2_loss': 72.88753509521484, 'l1_loss': 66.15180206298828}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 5340/488281 [01:15<1:00:45, 132.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5368/488281 [01:15<1:00:04, 133.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 5396/488281 [01:16<1:41:04, 79.62it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 138.72793579101562, 'l2_loss': 73.21202850341797, 'l1_l

  1%|          | 5424/488281 [01:16<1:19:51, 100.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5452/488281 [01:16<1:09:25, 115.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5480/488281 [01:17<1:04:18, 125.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5508/488281 [01:17<1:01:49, 130.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 137.37319946289062, 'l2_loss': 72.56304168701172, 'l1_loss': 64.81015014648438}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 5536/488281 [01:17<1:00:33, 132.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5564/488281 [01:17<59:56, 134.23it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 5592/488281 [01:18<1:41:06, 79.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 5620/488281 [01:18<1:19:48, 100.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 137.56893920898438, 'l2_loss': 72.36351776123047, 'l1_loss': 65.20541381835938}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 5648/488281 [01:18<1:09:20, 116.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5676/488281 [01:18<1:04:13, 125.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5704/488281 [01:19<1:01:45, 130.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 138.61111450195312, 'l2_loss': 72.80856323242188, 'l1_loss': 65.80255889892578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 5732/488281 [01:19<1:00:29, 132.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5746/488281 [01:19<1:00:08, 133.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 5774/488281 [01:19<1:40:56, 79.66it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 5802/488281 [01:20<1:19:43, 100.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 137.29409790039062, 'l2_loss': 72.42192077636719, 'l1_loss': 64.87217712402344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 5830/488281 [01:20<1:09:16, 116.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5858/488281 [01:20<1:04:09, 125.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5886/488281 [01:20<1:01:38, 130.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 5914/488281 [01:21<1:00:26, 133.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 137.90579223632812, 'l2_loss': 72.57383728027344, 'l1_loss': 65.33196258544922}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  1%|          | 5942/488281 [01:21<59:49, 134.36it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 5970/488281 [01:21<1:40:48, 79.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 5998/488281 [01:21<1:19:35, 100.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 19
acts.shape=torch.Size([4096, 4096])
{'loss': 137.448486328125, 'l2_loss': 72.13505554199219, 'l1_loss': 65.31342315673828}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 17.78it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 12.00it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 11.91it/s]
  1%|          | 6011/488281 [01:26<14:35:01,  9.19it/s]

acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0615, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 6039/488281 [01:26<7:34:33, 17.68it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|          | 6067/488281 [01:27<4:12:13, 31.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|          | 6095/488281 [01:27<2:33:34, 52.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6123/488281 [01:27<1:45:27, 76.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 137.6787872314453, 'l2_loss': 72.38927459716797, 'l1_loss': 65.28951263427734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  1%|▏         | 6137/488281 [01:27<1:31:34, 87.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 6165/488281 [01:28<1:56:18, 69.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 6193/488281 [01:28<1:27:11, 92.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6221/488281 [01:28<1:12:57, 110.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 137.25784301757812, 'l2_loss': 72.47172546386719, 'l1_loss': 64.7861099243164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  1%|▏         | 6249/488281 [01:28<1:05:57, 121.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6277/488281 [01:28<1:02:31, 128.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6305/488281 [01:29<1:00:53, 131.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.858154296875, 'l2_loss': 71.78781127929688, 'l1_loss': 64.0703353881836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  1%|▏         | 6333/488281 [01:29<1:00:02, 133.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 6361/488281 [01:29<1:40:48, 79.68it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 6389/488281 [01:30<1:19:35, 100.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6417/488281 [01:30<1:09:13, 116.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 136.75054931640625, 'l2_loss': 71.9384765625, 'l1_loss': 64.81208038330078}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

  1%|▏         | 6445/488281 [01:30<1:04:06, 125.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6473/488281 [01:30<1:01:36, 130.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6501/488281 [01:30<1:00:25, 132.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 136.56295776367188,

  1%|▏         | 6515/488281 [01:31<1:00:02, 133.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 6543/488281 [01:31<1:40:45, 79.69it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 6571/488281 [01:31<1:19:33, 100.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6599/488281 [01:31<1:09:25, 115.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.89744567871094, 'l2_loss': 71.6600341796875, 'l1_loss': 64.23741149902344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  1%|▏         | 6627/488281 [01:32<1:04:14, 124.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6655/488281 [01:32<1:01:37, 130.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6683/488281 [01:32<1:00:24, 132.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6711/488281 [01:32<59:49, 134.15it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 136.44749450683594, 'l2_loss': 71.69541931152344, 'l1_loss': 64.7520751953125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  1%|▏         | 6739/488281 [01:33<1:40:43, 79.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 6767/488281 [01:33<1:19:30, 100.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6795/488281 [01:33<1:09:07, 116.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 136.45755004882812,

  1%|▏         | 6823/488281 [01:33<1:04:04, 125.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6851/488281 [01:34<1:01:32, 130.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6879/488281 [01:34<1:00:19, 133.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6907/488281 [01:34<59:44, 134.29it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.90850830078125, 'l2_loss': 71.48468017578125, 'l1_loss': 64.42382049560547}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 6935/488281 [01:35<1:40:33, 79.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 6963/488281 [01:35<1:19:25, 101.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 6991/488281 [01:35<1:09:04, 116.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 16.36it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.26it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0205, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  1%|▏         | 7019/488281 [01:40<10:09:00, 13.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 7047/488281 [01:40<5:28:32, 24.41it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 7075/488281 [01:40<3:11:08, 41.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 7103/488281 [01:40<2:04:00, 64.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.44171142578125, 'l2_loss': 71.38079833984375, 'l1_loss': 64.06090545654297}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 7117/488281 [01:41<2:43:14, 49.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 7145/488281 [01:41<1:50:07, 72.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 7173/488281 [01:41<1:24:05, 95.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 7201/488281 [01:41<1:11:22, 112.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.409912109375, 'l2_loss': 71.34226989746094, 'l1_loss': 64.06763458251953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  1%|▏         | 7229/488281 [01:42<1:05:06, 123.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 7257/488281 [01:42<1:02:02, 129.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  1%|▏         | 7285/488281 [01:42<1:00:32, 132.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  1%|▏         | 7313/488281 [01:43<1:40:54, 79.44it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.98736572265625, 'l2_loss': 71.28791809082031, 'l1_loss': 64.69945526123047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 7341/488281 [01:43<1:19:34, 100.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7369/488281 [01:43<1:09:07, 115.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7397/488281 [01:43<1:03:59, 125.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.5945281982422, 'l2_loss': 71.34371948242188, 'l1_loss': 64.25080871582031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  2%|▏         | 7425/488281 [01:43<1:01:29, 130.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7453/488281 [01:44<1:00:16, 132.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7481/488281 [01:44<59:40, 134.29it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 7509/488281 [01:44<1:40:29, 79.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.65713500976562, 'l2_loss': 71.51671600341797, 'l1_loss': 64.14042663574219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 7537/488281 [01:45<1:19:21, 100.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7565/488281 [01:45<1:08:59, 116.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7593/488281 [01:45<1:03:54, 125.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7621/488281 [01:45<1:01:47, 129.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.40895080566406, 'l2_loss': 71.15641784667969, 'l1_loss': 64.25253295898438}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 7649/488281 [01:45<1:00:24, 132.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7677/488281 [01:46<1:00:00, 133.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 7691/488281 [01:46<2:00:01, 66.73it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 134.760986328125, 'l2_loss': 70.99850463867188, 'l1_loss': 63.76248550415039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  2%|▏         | 7719/488281 [01:46<1:28:55, 90.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7747/488281 [01:47<1:13:38, 108.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7775/488281 [01:47<1:06:08, 121.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7803/488281 [01:47<1:02:30, 128.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 134.5037078857422, 'l2_loss': 70.91525268554688, 'l1_loss': 63.58845901489258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  2%|▏         | 7831/488281 [01:47<1:00:41, 131.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7859/488281 [01:47<59:48, 133.89it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 7887/488281 [01:48<1:40:43, 79.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 7915/488281 [01:48<1:19:25, 100.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 136.36961364746094, 'l2_loss': 71.45767211914062, 'l1_loss': 64.91194152832031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 7943/488281 [01:48<1:08:57, 116.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7971/488281 [01:49<1:03:50, 125.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 7999/488281 [01:49<1:01:19, 130.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 20
acts.shape=torch.Size([4096, 4096])
{'loss': 134.95620727539062, 'l2_loss': 71.02203369140625, 'l1_loss': 63.934165954589844}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9237


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.20it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.84it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0200, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  2%|▏         | 8027/488281 [01:53<10:03:07, 13.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8055/488281 [01:54<5:25:33, 24.58it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8082/488281 [01:54<3:53:07, 34.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 8110/488281 [01:54<2:23:56, 55.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 135.3330078125, 'l2_loss': 71.12059020996094, 'l1_loss': 64.2124252319336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch

  2%|▏         | 8138/488281 [01:55<1:40:25, 79.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8166/488281 [01:55<1:19:11, 101.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8194/488281 [01:55<1:08:49, 116.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8222/488281 [01:55<1:03:48, 125.40it/s]

{'loss': 134.6437225341797, 'l2_loss': 70.97689819335938, 'l1_loss': 63.66682815551758}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  2%|▏         | 8250/488281 [01:55<1:01:17, 130.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 8278/488281 [01:56<1:41:14, 79.02it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 8306/488281 [01:56<1:19:40, 100.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 134.98312377929688, 'l2_loss': 71.14509582519531, 'l1_loss': 63.8380241394043}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  2%|▏         | 8334/488281 [01:56<1:09:03, 115.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8362/488281 [01:57<1:03:51, 125.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8390/488281 [01:57<1:01:18, 130.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8418/488281 [01:57<1:00:04, 133.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.97891235351562, 'l2_loss': 70.4326400756836, 'l1_loss': 63.5462760925293}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  2%|▏         | 8446/488281 [01:57<59:27, 134.51it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 8460/488281 [01:58<1:57:58, 67.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 8488/488281 [01:58<1:27:49, 91.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 134.56240844726562,

  2%|▏         | 8516/488281 [01:58<1:13:03, 109.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8544/488281 [01:58<1:05:49, 121.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8572/488281 [01:59<1:02:14, 128.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8600/488281 [01:59<1:00:29, 132.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 134.03770446777344, 'l2_loss': 70.5731201171875, 'l1_loss': 63.46458053588867}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  2%|▏         | 8628/488281 [01:59<59:41, 133.93it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 8656/488281 [01:59<1:40:31, 79.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 8684/488281 [02:00<1:19:15, 100.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8712/488281 [02:00<1:08:52, 116.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 134.0242919921875, 'l2_loss': 70.44080352783203, 'l1_loss': 63.5834846496582}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  2%|▏         | 8740/488281 [02:00<1:03:44, 125.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8768/488281 [02:00<1:01:13, 130.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8796/488281 [02:01<1:00:00, 133.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.64340209960938, 'l2_loss': 70.24034118652344, 'l1_loss': 63.403053283691406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  2%|▏         | 8824/488281 [02:01<59:24, 134.50it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 8852/488281 [02:01<1:40:08, 79.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 8880/488281 [02:01<1:19:03, 101.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8908/488281 [02:02<1:08:44, 116.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.4265899658203, 'l2_loss': 70.31281280517578, 'l1_loss': 63.113773345947266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 8936/488281 [02:02<1:03:39, 125.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8964/488281 [02:02<1:01:10, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 8992/488281 [02:02<59:57, 133.24it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.31it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0132, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  2%|▏         | 9020/488281 [02:07<9:59:14, 13.33it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 9045/488281 [02:08<6:18:50, 21.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 9073/488281 [02:08<3:31:54, 37.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9101/488281 [02:08<2:12:59, 60.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.1292724609375, 'l2_loss': 69.92524719238281, 'l1_loss': 63.20403289794922}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  2%|▏         | 9129/488281 [02:08<1:35:08, 83.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9157/488281 [02:08<1:16:31, 104.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9185/488281 [02:09<1:07:27, 118.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9213/488281 [02:09<1:03:03, 126.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.82296752929688, 'l2_loss': 70.43883514404297, 'l1_loss': 63.384132385253906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 9241/488281 [02:09<1:41:58, 78.29it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 9269/488281 [02:10<1:19:56, 99.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9297/488281 [02:10<1:09:07, 115.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9325/488281 [02:10<1:03:51, 125.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.70127868652344, 'l2_loss': 70.2587661743164, 'l1_loss': 63.442508697509766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 9353/488281 [02:10<1:01:15, 130.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9381/488281 [02:10<59:58, 133.08it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9395/488281 [02:10<59:36, 133.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 132.721923828125, 'l2_loss': 70.43116760253906, 'l1_loss': 62.290748596191406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 9423/488281 [02:11<1:40:11, 79.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 9451/488281 [02:11<1:19:02, 100.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9479/488281 [02:11<1:08:41, 116.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9507/488281 [02:12<1:03:39, 125.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.74264526367188, 'l2_loss': 70.3740234375, 'l1_loss': 63.36861801147461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

  2%|▏         | 9535/488281 [02:12<1:01:07, 130.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9563/488281 [02:12<59:53, 133.22it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9591/488281 [02:12<59:18, 134.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 9619/488281 [02:13<1:40:01, 79.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.82730102539062, 'l2_loss': 70.49070739746094, 'l1_loss': 63.33659744262695}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 9647/488281 [02:13<1:18:56, 101.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9675/488281 [02:13<1:08:45, 116.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9703/488281 [02:13<1:03:39, 125.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 132.78749084472656, 'l2_loss': 69.70040893554688, 'l1_loss': 63.08708572387695}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 9731/488281 [02:14<1:01:06, 130.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9759/488281 [02:14<59:53, 133.18it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9787/488281 [02:14<59:16, 134.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 9815/488281 [02:15<1:40:05, 79.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 132.88070678710938, 'l2_loss': 69.97350311279297, 'l1_loss': 62.90719985961914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 9843/488281 [02:15<1:18:58, 100.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9871/488281 [02:15<1:08:36, 116.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9899/488281 [02:15<1:03:31, 125.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.36465454101562,

  2%|▏         | 9927/488281 [02:15<1:01:04, 130.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9955/488281 [02:16<59:50, 133.21it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 9983/488281 [02:16<59:14, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 9997/488281 [02:16<1:57:29, 67.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 21
acts.shape=torch.Size([4096, 4096])
{'loss': 133.34078979492188, 'l2_loss': 70.05208587646484, 'l1_loss': 63.2887077331543}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9736523628234863 4.3787946701049805 8.9440


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.31it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0117, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  2%|▏         | 10022/488281 [02:21<10:54:51, 12.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10050/488281 [02:21<5:43:56, 23.17it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10078/488281 [02:21<3:16:51, 40.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10106/488281 [02:22<2:06:02, 63.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.2162628173828, 'l2_loss': 70.0018310546875, 'l1_loss': 63.21443557739258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  2%|▏         | 10134/488281 [02:22<1:31:35, 87.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10162/488281 [02:22<1:14:59, 106.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10190/488281 [02:23<1:47:35, 74.06it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.4615020751953, 'l2_loss': 70.29037475585938, 'l1_lo

  2%|▏         | 10218/488281 [02:23<1:22:38, 96.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10246/488281 [02:23<1:10:24, 113.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10274/488281 [02:23<1:04:23, 123.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10302/488281 [02:23<1:01:28, 129.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.5483856201172, 'l2_loss': 70.24239349365234, 'l1_loss': 63.30598831176758}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  2%|▏         | 10330/488281 [02:24<1:00:01, 132.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10358/488281 [02:24<59:19, 134.28it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 10386/488281 [02:24<1:39:52, 79.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 10414/488281 [02:25<1:18:52, 100.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 132.18692016601562, 'l2_loss': 69.48180389404297, 'l1_loss': 62.70512008666992}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 10442/488281 [02:25<1:08:31, 116.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10470/488281 [02:25<1:03:27, 125.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10498/488281 [02:25<1:00:59, 130.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 132.24252319335938, 'l2_loss': 69.87264251708984, 'l1_loss': 62.36988067626953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 10526/488281 [02:25<59:47, 133.16it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10554/488281 [02:26<59:11, 134.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 10582/488281 [02:26<1:39:46, 79.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 10610/488281 [02:26<1:18:48, 101.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 132.45635986328125, 'l2_loss': 69.66220092773438, 'l1_loss': 62.794151306152344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  2%|▏         | 10638/488281 [02:27<1:08:29, 116.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10666/488281 [02:27<1:03:26, 125.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10694/488281 [02:27<1:01:06, 130.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10722/488281 [02:27<59:51, 132.97it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.57241821289062, 'l2_loss': 69.5302734375, 'l1_loss': 62.042137145996094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  2%|▏         | 10750/488281 [02:27<59:12, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 10764/488281 [02:28<1:57:30, 67.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 10792/488281 [02:28<1:27:27, 91.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.5390625, 'l2_loss': 69.41775512695312, 'l1_loss': 62.12130355834961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.S

  2%|▏         | 10820/488281 [02:28<1:12:44, 109.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10848/488281 [02:28<1:05:30, 121.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10876/488281 [02:29<1:01:57, 128.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 10904/488281 [02:29<1:00:15, 132.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.9422607421875, 'l2_loss': 69.0400390625, 'l1_loss': 62.90222930908203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch

  2%|▏         | 10932/488281 [02:29<59:23, 133.97it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 10960/488281 [02:30<1:39:50, 79.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 10988/488281 [02:30<1:18:47, 100.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.28it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A
  2%|▏         | 11001/488281 [02:34<14:17:38,  9.27it/s]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0044, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11029/488281 [02:35<7:25:46, 17.84it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11057/488281 [02:35<4:07:15, 32.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11085/488281 [02:35<2:30:45, 52.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11113/488281 [02:35<1:43:41, 76.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.30477905273438, 'l2_loss': 68.8753662109375, 'l1_loss': 62.42941665649414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  2%|▏         | 11127/488281 [02:35<1:30:09, 88.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11155/488281 [02:36<1:54:48, 69.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11183/488281 [02:36<1:26:22, 92.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11211/488281 [02:36<1:12:11, 110.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 132.33779907226562, 'l2_loss': 69.46755981445312, 'l1_loss': 62.870243072509766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  2%|▏         | 11239/488281 [02:36<1:05:12, 121.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11267/488281 [02:37<1:01:47, 128.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11295/488281 [02:37<1:00:07, 132.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11323/488281 [02:37<59:19, 133.98it/s]  

{'loss': 131.6577606201172, 'l2_loss': 69.38249969482422, 'l1_loss': 62.275264739990234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 11351/488281 [02:38<1:39:51, 79.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11379/488281 [02:38<1:18:45, 100.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11407/488281 [02:38<1:08:26, 116.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 133.16493225097656, 'l2_loss': 70.07110595703125, 'l1_loss': 63.09382629394531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 11435/488281 [02:38<1:03:21, 125.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11463/488281 [02:38<1:00:52, 130.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11491/488281 [02:39<59:38, 133.22it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11519/488281 [02:39<59:04, 134.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.76565551757812, 'l2_loss': 69.08444213867188, 'l1_loss': 62.681217193603516}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11533/488281 [02:39<1:57:11, 67.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11561/488281 [02:40<1:27:15, 91.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11589/488281 [02:40<1:12:33, 109.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.65805053710938,

  2%|▏         | 11617/488281 [02:40<1:05:24, 121.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11645/488281 [02:40<1:01:51, 128.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11673/488281 [02:40<1:00:07, 132.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11701/488281 [02:41<59:39, 133.14it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.9551544189453, 'l2_loss': 69.22578430175781, 'l1_loss': 62.729366302490234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11729/488281 [02:41<1:39:55, 79.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11757/488281 [02:41<1:18:46, 100.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11785/488281 [02:42<1:08:24, 116.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11813/488281 [02:42<1:03:20, 125.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.4182586669922, 'l2_loss': 68.63617706298828, 'l1_loss': 62.782081604003906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  2%|▏         | 11841/488281 [02:42<1:00:49, 130.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11869/488281 [02:42<59:36, 133.20it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11897/488281 [02:42<59:00, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.77557373046875, 'l2_loss': 68.71858215332031, 'l1_loss': 62.05699157714844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11925/488281 [02:43<1:39:31, 79.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 11953/488281 [02:43<1:18:33, 101.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11981/488281 [02:43<1:08:16, 116.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 11995/488281 [02:43<1:05:18, 121.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 22
acts.shape=torch.Size([4096, 4096])
{'loss': 131.64346313476562, 'l2_loss': 69.45118713378906, 'l1_loss': 62.1922721862793}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.905557155609131 4.321825981140137 8.990290641784668
91.81%
Reconstruction: (0.9181335999885215, 3.905557155609131, 4.321825981140137, 8.99029064178466


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.28it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0068, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  2%|▏         | 12023/488281 [02:48<10:00:05, 13.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 12051/488281 [02:48<5:23:49, 24.51it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 12079/488281 [02:49<3:08:27, 42.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 12093/488281 [02:49<2:29:27, 53.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 12121/488281 [02:49<2:23:37, 55.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.51600646972656, 'l2_loss': 69.4638671875, 'l1_loss': 62.05213928222656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

  2%|▏         | 12149/488281 [02:49<1:40:08, 79.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  2%|▏         | 12163/488281 [02:50<1:27:45, 90.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  2%|▏         | 12191/488281 [02:50<1:13:35, 107.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.40499877929688, 'l2_loss': 68.75977325439453, 'l1_loss': 62.645233154296875}
acts.shape=torch.Size([4096, 4096])
acts.shape

  3%|▎         | 12219/488281 [02:50<1:05:50, 120.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12247/488281 [02:50<1:02:02, 127.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12275/488281 [02:50<1:00:10, 131.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 12303/488281 [02:51<1:40:08, 79.22it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.33363342285156, 'l2_loss': 68.8555908203125, 'l1_loss': 62.4780387878418}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  3%|▎         | 12331/488281 [02:51<1:18:49, 100.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12359/488281 [02:51<1:08:23, 115.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12387/488281 [02:52<1:03:15, 125.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12415/488281 [02:52<1:00:46, 130.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.01368713378906, 'l2_loss': 68.61228942871094, 'l1_loss': 62.40140151977539}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 12443/488281 [02:52<59:33, 133.17it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12471/488281 [02:52<58:57, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 12499/488281 [02:53<1:39:23, 79.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.60623168945312, 'l2_loss': 68.44866943359375, 'l1_loss': 62.15756607055664}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 12527/488281 [02:53<1:18:28, 101.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12555/488281 [02:53<1:08:12, 116.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12583/488281 [02:53<1:03:10, 125.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12611/488281 [02:54<1:00:44, 130.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.93243408203125, 'l2_loss': 68.96951293945312, 'l1_loss': 61.96291732788086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 12639/488281 [02:54<59:31, 133.18it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12667/488281 [02:54<58:55, 134.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 12695/488281 [02:54<1:39:20, 79.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 12723/488281 [02:55<1:18:43, 100.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.99720764160156, 'l2_loss': 69.20649719238281, 'l1_loss': 62.790706634521484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  3%|▎         | 12751/488281 [02:55<1:08:19, 116.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12779/488281 [02:55<1:03:13, 125.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12807/488281 [02:55<1:00:44, 130.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.1693878173828, 'l2_loss': 68.69376373291016, 'l1_loss': 62.47562789916992}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  3%|▎         | 12835/488281 [02:56<59:31, 133.13it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12863/488281 [02:56<58:54, 134.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 12877/488281 [02:56<1:56:57, 67.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 12905/488281 [02:56<1:27:04, 90.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.5807342529297, 'l2_loss': 69.09645080566406, 'l1_loss': 62.484283447265625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 12933/488281 [02:57<1:12:23, 109.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12961/488281 [02:57<1:05:11, 121.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 12989/488281 [02:57<1:01:40, 128.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.6659698486328, 


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.34it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0056, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  3%|▎         | 13017/488281 [03:02<9:55:46, 13.30it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13045/488281 [03:02<5:21:39, 24.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13072/488281 [03:02<3:50:28, 34.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13100/488281 [03:03<2:22:00, 55.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.14193725585938, 'l2_loss': 69.05198669433594, 'l1_loss': 62.08995819091797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 13128/488281 [03:03<1:39:10, 79.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13156/488281 [03:03<1:18:17, 101.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13184/488281 [03:03<1:08:03, 116.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13212/488281 [03:03<1:03:22, 124.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.80191040039062, 'l2_loss': 68.35406494140625, 'l1_loss': 61.44784927368164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 13240/488281 [03:04<1:00:46, 130.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13268/488281 [03:04<1:40:14, 78.98it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13296/488281 [03:04<1:18:49, 100.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.07504272460938,

  3%|▎         | 13324/488281 [03:05<1:08:22, 115.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13352/488281 [03:05<1:03:12, 125.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13380/488281 [03:05<1:00:40, 130.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13408/488281 [03:05<59:28, 133.07it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.86260986328125, 'l2_loss': 68.19601440429688, 'l1_loss': 62.66659927368164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 13436/488281 [03:05<58:51, 134.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13464/488281 [03:06<1:39:08, 79.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13492/488281 [03:06<1:18:17, 101.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13520/488281 [03:06<1:08:04, 116.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.2881317138672, 'l2_loss': 68.69044494628906, 'l1_loss': 62.59768295288086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  3%|▎         | 13548/488281 [03:07<1:03:03, 125.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13576/488281 [03:07<1:00:35, 130.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13604/488281 [03:07<59:25, 133.15it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.55804443359375, 'l2_loss': 68.1576156616211, 'l1_loss': 62.40042495727539}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  3%|▎         | 13618/488281 [03:07<59:03, 133.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13646/488281 [03:08<1:39:14, 79.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13674/488281 [03:08<1:18:18, 101.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13702/488281 [03:08<1:08:05, 116.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.94406127929688, 'l2_loss': 69.23469543457031, 'l1_loss': 62.70936965942383}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 13730/488281 [03:08<1:03:02, 125.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13758/488281 [03:09<1:00:47, 130.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13786/488281 [03:09<59:27, 133.00it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13814/488281 [03:09<58:51, 134.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.33995056152344, 'l2_loss': 68.331298828125, 'l1_loss': 62.00864791870117}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  3%|▎         | 13842/488281 [03:09<1:39:11, 79.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 13870/488281 [03:10<1:18:16, 101.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13898/488281 [03:10<1:08:01, 116.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.83004760742188, 'l2_loss': 68.88003540039062, 'l1_loss': 61.95001220703125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 13926/488281 [03:10<1:03:01, 125.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13954/488281 [03:10<1:00:32, 130.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13982/488281 [03:11<59:20, 133.22it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 13996/488281 [03:11<58:59, 134.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 23
acts.shape=torch.Size([4096, 4096])
{'loss': 130.80503845214844, 'l2_loss': 69.00882720947266, 'l1_loss': 61.796207427978516}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.904162883758545 4.308743476867676 8.875269889831543
91.86%
Reconstruction: (0.9186135819215174, 3.904162883758545, 4.308743476867676, 8.875269889831543)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.35it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0056, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  3%|▎         | 14010/488281 [03:15<13:45:46,  9.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 14034/488281 [03:16<8:21:15, 15.77it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 14062/488281 [03:16<4:28:25, 29.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14090/488281 [03:16<2:39:36, 49.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14118/488281 [03:16<1:47:31, 73.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.0538330078125, 'l2_loss': 68.86422729492188, 'l1_loss': 62.18960952758789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  3%|▎         | 14146/488281 [03:17<1:22:15, 96.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14174/488281 [03:17<1:09:57, 112.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14202/488281 [03:17<1:03:58, 123.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.68923950195312, 'l2_loss': 68.37531280517578, 'l1_loss': 62.313926696777344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 14230/488281 [03:18<1:41:53, 77.54it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 14258/488281 [03:18<1:19:34, 99.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14286/488281 [03:18<1:08:38, 115.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14314/488281 [03:18<1:03:19, 124.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.18714904785156, 'l2_loss': 68.44224548339844, 'l1_loss': 62.74489974975586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 14342/488281 [03:18<1:00:39, 130.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14370/488281 [03:19<59:23, 133.00it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14398/488281 [03:19<58:45, 134.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 14412/488281 [03:19<1:56:36, 67.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.244873046875, 'l2_loss': 68.58096313476562, 'l1_loss': 61.66391372680664}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  3%|▎         | 14440/488281 [03:19<1:26:47, 91.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14468/488281 [03:20<1:12:09, 109.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14496/488281 [03:20<1:04:58, 121.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.23321533203125, 'l2_loss': 68.84336853027344, 'l1_loss': 62.38985061645508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 14524/488281 [03:20<1:01:29, 128.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14552/488281 [03:20<59:45, 132.13it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14580/488281 [03:20<58:55, 133.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 14608/488281 [03:21<1:39:02, 79.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.0606689453125, 'l2_loss': 68.94287109375, 'l1_loss': 62.11780548095703}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

  3%|▎         | 14636/488281 [03:21<1:18:09, 101.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14664/488281 [03:21<1:07:55, 116.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14692/488281 [03:22<1:02:53, 125.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.06036376953125,

  3%|▎         | 14720/488281 [03:22<1:00:26, 130.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14748/488281 [03:22<59:13, 133.25it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14776/488281 [03:22<58:46, 134.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 14804/488281 [03:23<1:39:03, 79.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.22946166992188, 'l2_loss': 68.48168182373047, 'l1_loss': 61.74777603149414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 14832/488281 [03:23<1:18:08, 100.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14860/488281 [03:23<1:07:54, 116.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14888/488281 [03:23<1:02:51, 125.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14916/488281 [03:24<1:00:25, 130.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.36322021484375, 'l2_loss': 68.81964111328125, 'l1_loss': 61.543582916259766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  3%|▎         | 14944/488281 [03:24<59:13, 133.21it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 14972/488281 [03:24<58:37, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15000/488281 [03:25<1:38:50, 79.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.42254638671875, 'l2_loss': 68.67219543457031, 'l1_l


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.35it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0037, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  3%|▎         | 15026/488281 [03:29<10:28:34, 12.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15054/488281 [03:30<5:33:17, 23.66it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15082/488281 [03:30<3:11:54, 41.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15110/488281 [03:30<2:03:26, 63.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.6771240234375, 'l2_loss': 68.09375, 'l1_loss': 61.583377838134766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Siz

  3%|▎         | 15138/488281 [03:30<1:30:01, 87.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15166/488281 [03:30<1:13:41, 106.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15180/488281 [03:31<2:06:42, 62.23it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15208/488281 [03:31<1:31:42, 85.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.57293701171875, 'l2_loss': 68.6444320678711, 'l1_loss': 61.928504943847656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 15236/488281 [03:31<1:14:31, 105.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15264/488281 [03:31<1:06:21, 118.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15292/488281 [03:32<1:02:05, 126.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.716552734375, 'l2_loss': 68.34162902832031, 'l1_loss': 61.37491989135742}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  3%|▎         | 15320/488281 [03:32<1:00:01, 131.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15348/488281 [03:32<59:00, 133.58it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15376/488281 [03:33<1:39:03, 79.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15404/488281 [03:33<1:18:09, 100.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.41030883789062, 'l2_loss': 68.46852111816406, 'l1_loss': 61.94178009033203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 15432/488281 [03:33<1:07:52, 116.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15460/488281 [03:33<1:02:49, 125.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15488/488281 [03:33<1:00:20, 130.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15516/488281 [03:34<59:10, 133.14it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.92698669433594, 'l2_loss': 68.38175964355469, 'l1_loss': 61.545223236083984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  3%|▎         | 15544/488281 [03:34<58:33, 134.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15572/488281 [03:34<1:38:41, 79.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15600/488281 [03:35<1:17:55, 101.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.45965576171875, 'l2_loss': 67.79478454589844, 'l1_loss': 61.66486358642578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 15628/488281 [03:35<1:07:46, 116.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15656/488281 [03:35<1:02:45, 125.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15684/488281 [03:35<1:00:19, 130.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15712/488281 [03:35<59:08, 133.17it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.47195434570312, 'l2_loss': 67.87936401367188, 'l1_loss': 61.59259033203125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 15740/488281 [03:36<58:33, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15768/488281 [03:36<1:38:40, 79.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15796/488281 [03:36<1:18:03, 100.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15824/488281 [03:37<1:07:49, 116.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 131.0106964111328, 'l2_loss': 68.84738159179688, 'l1_loss': 62.1633186340332}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  3%|▎         | 15852/488281 [03:37<1:02:46, 125.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15880/488281 [03:37<1:00:18, 130.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15908/488281 [03:37<59:08, 133.11it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.63748168945312, 'l2_loss': 68.22891998291016, 'l1_loss': 62.40855407714844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  3%|▎         | 15922/488281 [03:37<58:46, 133.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15950/488281 [03:38<1:38:53, 79.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 15978/488281 [03:38<1:17:59, 100.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 15992/488281 [03:38<1:11:58, 109.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 24
acts.shape=torch.Size([4096, 4096])
{'loss': 130.2792510986328, 'l2_loss': 68.23944854736328, 'l1_loss': 62.03980255126953}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.883171796798706 4.2883100509643555 8.919275283813477
91.96%
Reconstruction: (0.919553230943274, 3.883171796798706, 4.2883100509643555, 8.919275283813477)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.36it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0024, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  3%|▎         | 16019/488281 [03:43<10:07:47, 12.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16047/488281 [03:43<5:25:16, 24.20it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16075/488281 [03:43<3:08:26, 41.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16103/488281 [03:43<2:01:47, 64.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.5045928955078, 'l2_loss': 68.12081146240234, 'l1_loss': 61.38378143310547}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  3%|▎         | 16117/488281 [03:44<1:42:35, 76.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 16145/488281 [03:44<2:00:14, 65.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 16173/488281 [03:44<1:28:26, 88.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16201/488281 [03:45<1:12:53, 107.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.6158905029297, 'l2_loss': 68.2131118774414, 'l1_loss': 61.40278244018555}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  3%|▎         | 16229/488281 [03:45<1:05:15, 120.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16257/488281 [03:45<1:01:29, 127.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16285/488281 [03:45<1:00:14, 130.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 16313/488281 [03:45<59:04, 133.16it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.34530639648438, 'l2_loss': 67.4218521118164, 'l1_loss': 61.92345428466797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  3%|▎         | 16341/488281 [03:46<1:38:56, 79.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 16369/488281 [03:46<1:17:59, 100.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16397/488281 [03:46<1:07:44, 116.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.69166564941406,

  3%|▎         | 16425/488281 [03:47<1:02:43, 125.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16453/488281 [03:47<1:00:14, 130.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16481/488281 [03:47<59:02, 133.20it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16509/488281 [03:47<58:27, 134.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.89523315429688, 'l2_loss': 68.27389526367188, 'l1_loss': 61.621341705322266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 16537/488281 [03:48<1:38:33, 79.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 16565/488281 [03:48<1:17:47, 101.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16593/488281 [03:48<1:07:36, 116.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16621/488281 [03:48<1:02:39, 125.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.6742401123047, 'l2_loss': 68.26077270507812, 'l1_loss': 61.4134635925293}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  3%|▎         | 16649/488281 [03:49<1:00:11, 130.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16677/488281 [03:49<58:59, 133.23it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16691/488281 [03:49<58:39, 134.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.63055419921875, 'l2_loss': 67.89314270019531, 'l1_loss': 61.73741912841797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 16719/488281 [03:49<1:38:38, 79.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 16747/488281 [03:50<1:17:49, 100.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16775/488281 [03:50<1:07:37, 116.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16803/488281 [03:50<1:03:06, 124.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.14599609375, 'l2_loss': 68.28716278076172, 'l1_loss': 61.85882568359375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  3%|▎         | 16831/488281 [03:50<1:00:24, 130.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16859/488281 [03:50<59:05, 132.97it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16887/488281 [03:51<58:27, 134.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 16915/488281 [03:51<1:38:35, 79.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.2991485595703, 'l2_loss': 68.14230346679688, 'l1_loss': 61.15684127807617}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  3%|▎         | 16943/488281 [03:51<1:17:47, 100.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16971/488281 [03:52<1:07:36, 116.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 16999/488281 [03:52<1:02:35, 125.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.52960205078125, 'l2_loss': 68.3951416015625, 'l1_loss': 62.13446044921875}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 16.50it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 11.93it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 11.85it/s]
  3%|▎         | 17013/488281 [03:56<13:45:50,  9.51it/s]

Num dead tensor(0.0024, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  3%|▎         | 17041/488281 [03:57<7:14:06, 18.09it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 17069/488281 [03:57<4:02:10, 32.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  3%|▎         | 17083/488281 [03:57<3:06:50, 42.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 17110/488281 [03:57<2:42:47, 48.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.83119201660156, 'l2_loss': 68.14591979980469, 'l1_loss': 61.685272216796875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▎         | 17138/488281 [03:58<1:48:49, 72.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17166/488281 [03:58<1:22:40, 94.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17194/488281 [03:58<1:09:56, 112.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17222/488281 [03:58<1:03:44, 123.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.54412841796875, 'l2_loss': 67.78353118896484, 'l1_loss': 61.760589599609375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▎         | 17250/488281 [03:59<1:00:41, 129.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17278/488281 [03:59<59:12, 132.59it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 17292/488281 [03:59<1:56:27, 67.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.95074462890625, 'l2_loss': 68.15689849853516, 'l1_loss': 61.79384994506836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▎         | 17320/488281 [03:59<1:26:43, 90.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17348/488281 [04:00<1:11:56, 109.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17376/488281 [04:00<1:04:42, 121.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17404/488281 [04:00<1:01:12, 128.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.2958221435547, 'l2_loss': 67.87672424316406, 'l1_loss': 61.419097900390625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▎         | 17432/488281 [04:00<59:26, 132.02it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17460/488281 [04:00<58:35, 133.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 17488/488281 [04:01<1:38:34, 79.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 17516/488281 [04:01<1:17:46, 100.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.15586853027344, 'l2_loss': 67.91407012939453, 'l1_loss': 61.241798400878906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▎         | 17544/488281 [04:01<1:07:33, 116.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17572/488281 [04:02<1:02:32, 125.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17600/488281 [04:02<1:00:04, 130.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.1403045654297, 'l2_loss': 68.39724731445312, 'l1_loss': 61.7430534362793}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  4%|▎         | 17628/488281 [04:02<58:54, 133.15it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17656/488281 [04:02<58:18, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 17684/488281 [04:03<1:38:19, 79.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 17712/488281 [04:03<1:17:39, 100.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.6112823486328, 'l2_loss': 67.63443756103516, 'l1_loss': 60.97684860229492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  4%|▎         | 17740/488281 [04:03<1:07:29, 116.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17768/488281 [04:03<1:02:30, 125.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17796/488281 [04:04<1:00:03, 130.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.81858825683594,

  4%|▎         | 17824/488281 [04:04<59:05, 132.70it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17852/488281 [04:04<58:23, 134.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 17880/488281 [04:05<1:38:55, 79.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 17908/488281 [04:05<1:17:56, 100.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.34628295898438, 'l2_loss': 67.8509750366211, 'l1_loss': 61.49531173706055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  4%|▎         | 17936/488281 [04:05<1:07:38, 115.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17964/488281 [04:05<1:02:33, 125.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 17992/488281 [04:05<1:00:04, 130.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.30it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0022, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  4%|▎         | 18020/488281 [04:10<9:50:49, 13.27it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 18034/488281 [04:10<7:10:52, 18.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 18062/488281 [04:11<4:40:38, 27.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 18090/488281 [04:11<2:46:54, 46.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.80825805664062,

  4%|▎         | 18118/488281 [04:11<1:51:13, 70.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 18146/488281 [04:11<1:23:54, 93.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 18174/488281 [04:12<1:10:32, 111.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▎         | 18202/488281 [04:12<1:04:00, 122.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.596923828125, 'l2_loss': 67.57533264160156, 'l1_loss': 61.0215950012207}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  4%|▎         | 18230/488281 [04:12<1:00:47, 128.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 18258/488281 [04:13<1:39:29, 78.74it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▎         | 18286/488281 [04:13<1:18:09, 100.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18314/488281 [04:13<1:08:07, 114.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 130.19203186035156, 'l2_loss': 68.25267028808594, 'l1_loss': 61.939361572265625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▍         | 18342/488281 [04:13<1:02:48, 124.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18370/488281 [04:13<1:00:10, 130.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18398/488281 [04:14<58:53, 132.99it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.3523712158203, 'l2_loss': 67.52870178222656, 'l1_loss': 60.823673248291016}
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▍         | 18426/488281 [04:14<58:17, 134.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 18454/488281 [04:14<1:38:14, 79.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 18482/488281 [04:15<1:17:32, 100.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18510/488281 [04:15<1:07:26, 116.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.04071044921875, 'l2_loss': 67.75091552734375, 'l1_loss': 61.289791107177734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▍         | 18538/488281 [04:15<1:02:25, 125.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18566/488281 [04:15<1:00:01, 130.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18594/488281 [04:15<58:48, 133.12it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18622/488281 [04:16<58:14, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.1403350830078, 'l2_loss': 67.77783966064453, 'l1_loss': 61.362491607666016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 18636/488281 [04:16<1:55:30, 67.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 18664/488281 [04:16<1:25:59, 91.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18692/488281 [04:16<1:11:30, 109.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.81349182128906, 'l2_loss': 68.11163330078125, 'l1_loss': 60.70185470581055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▍         | 18720/488281 [04:17<1:04:25, 121.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18748/488281 [04:17<1:00:56, 128.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18776/488281 [04:17<59:14, 132.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18804/488281 [04:17<58:25, 133.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.64645385742188, 'l2_loss': 68.08406066894531, 'l1_loss': 61.56239318847656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 18832/488281 [04:18<1:38:13, 79.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 18860/488281 [04:18<1:17:42, 100.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18888/488281 [04:18<1:07:26, 116.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18916/488281 [04:18<1:02:26, 125.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.53746032714844, 'l2_loss': 68.2756118774414, 'l1_loss': 61.26184844970703}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  4%|▍         | 18944/488281 [04:19<59:57, 130.45it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 18972/488281 [04:19<58:45, 133.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19000/488281 [04:19<58:11, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.9114990234375, 'l2_loss': 67.84002685546875, 'l1_loss': 61.071468353271484}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8965516090393066 4.305104


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.33it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0015, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])





acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 19028/488281 [04:24<10:27:07, 12.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 19056/488281 [04:24<5:36:37, 23.23it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19084/488281 [04:24<3:14:17, 40.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19112/488281 [04:25<2:04:35, 62.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.96499633789062, 'l2_loss': 67.8700942993164, 'l1_loss': 61.09490966796875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  4%|▍         | 19140/488281 [04:25<1:30:23, 86.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19168/488281 [04:25<1:13:39, 106.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19196/488281 [04:25<1:05:26, 119.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 19224/488281 [04:26<1:41:37, 76.93it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.421142578125, 'l2_loss': 68.34568786621094, 'l1_loss': 61.07544708251953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  4%|▍         | 19252/488281 [04:26<1:19:08, 98.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19280/488281 [04:26<1:08:07, 114.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19308/488281 [04:26<1:02:46, 124.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.3455810546875, 'l2_loss': 67.2728271484375, 'l1_loss': 61.072757720947266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  4%|▍         | 19336/488281 [04:27<1:00:32, 129.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 19364/488281 [04:27<59:00, 132.43it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19378/488281 [04:27<58:34, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 19406/488281 [04:28<1:38:19, 79.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.05979919433594, 'l2_loss': 68.09494018554688, 'l1_loss': 60.96485900878906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▍         | 19434/488281 [04:28<1:17:30, 100.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19462/488281 [04:28<1:07:18, 116.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19490/488281 [04:28<1:02:18, 125.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.66058349609375,

  4%|▍         | 19518/488281 [04:28<59:52, 130.48it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19546/488281 [04:29<58:40, 133.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19574/488281 [04:29<58:05, 134.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 19602/488281 [04:29<1:37:56, 79.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.40768432617188, 'l2_loss': 67.7779312133789, 'l1_loss': 60.6297492980957}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  4%|▍         | 19630/488281 [04:30<1:17:19, 101.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19658/488281 [04:30<1:07:14, 116.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19686/488281 [04:30<1:02:15, 125.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19714/488281 [04:30<59:51, 130.46it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.63157653808594, 'l2_loss': 67.67677307128906, 'l1_loss': 60.95479965209961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▍         | 19742/488281 [04:30<58:38, 133.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19770/488281 [04:31<58:03, 134.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 19798/488281 [04:31<1:37:53, 79.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.68460083007812, 'l2_loss': 68.33074188232422, 'l1_l

  4%|▍         | 19826/488281 [04:31<1:17:17, 101.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19854/488281 [04:32<1:07:10, 116.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19882/488281 [04:32<1:02:28, 124.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19910/488281 [04:32<59:56, 130.24it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.45687866210938, 'l2_loss': 67.57200622558594, 'l1_loss': 60.88486862182617}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▍         | 19938/488281 [04:32<58:40, 133.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 19966/488281 [04:32<58:03, 134.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 19980/488281 [04:33<1:55:16, 67.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 19994/488281 [04:33<1:37:56, 79.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 26
acts.shape=torch.Size([4096, 4096])
{'loss': 129.3941650390625, 'l2_loss': 67.76728057861328, 'l1_loss': 61.626888275146484}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.894390821456909 4.2898688316345215 8.986764907836914
92.23%
Reconstruction: (0.922339167651616, 3.894390821456909, 4.2898688316345215, 8.986764907836914)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.34it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0010, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  4%|▍         | 20020/488281 [04:38<10:23:28, 12.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20048/488281 [04:38<5:30:29, 23.61it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20076/488281 [04:38<3:10:12, 41.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20104/488281 [04:38<2:02:15, 63.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.85580444335938, 'l2_loss': 67.55867767333984, 'l1_loss': 61.29713439941406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▍         | 20132/488281 [04:38<1:29:05, 87.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20146/488281 [04:39<1:19:33, 98.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20174/488281 [04:39<1:48:15, 72.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20202/488281 [04:39<1:22:18, 94.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.90516662597656, 'l2_loss': 67.75102233886719, 'l1_loss': 61.15414810180664}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▍         | 20230/488281 [04:40<1:09:32, 112.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20258/488281 [04:40<1:03:17, 123.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20286/488281 [04:40<1:00:14, 129.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20314/488281 [04:40<58:46, 132.71it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.54635620117188, 'l2_loss': 67.3424072265625, 'l1_loss': 61.203941345214844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▍         | 20342/488281 [04:40<58:01, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20370/488281 [04:41<1:37:59, 79.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20398/488281 [04:41<1:17:13, 100.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.84539794921875, 'l2_loss': 67.75041961669922, 'l1_loss': 61.094970703125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  4%|▍         | 20426/488281 [04:41<1:07:04, 116.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20454/488281 [04:42<1:02:04, 125.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20482/488281 [04:42<59:37, 130.76it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20510/488281 [04:42<58:28, 133.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.6437225341797, 'l2_loss': 67.70428466796875, 'l1_loss': 60.93943405151367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  4%|▍         | 20538/488281 [04:42<57:51, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20566/488281 [04:43<1:37:52, 79.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20594/488281 [04:43<1:17:09, 101.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20622/488281 [04:43<1:07:01, 116.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.733154296875, 'l2_loss': 67.7220458984375, 'l1_loss': 61.011112213134766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  4%|▍         | 20650/488281 [04:43<1:02:02, 125.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20678/488281 [04:43<59:35, 130.77it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20706/488281 [04:44<58:25, 133.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.05775451660156, 'l2_loss': 67.29788970947266, 'l1_loss': 60.759864807128906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▍         | 20734/488281 [04:44<57:49, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20748/488281 [04:44<1:54:47, 67.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20776/488281 [04:45<1:25:27, 91.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20804/488281 [04:45<1:11:05, 109.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.15444946289062, 'l2_loss': 67.3333740234375, 'l1_loss': 60.82107925415039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  4%|▍         | 20832/488281 [04:45<1:04:01, 121.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20860/488281 [04:45<1:00:33, 128.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 20888/488281 [04:45<59:02, 131.93it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.82160186767578,

  4%|▍         | 20916/488281 [04:46<58:08, 133.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20944/488281 [04:46<1:37:50, 79.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 20972/488281 [04:46<1:17:06, 101.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21000/488281 [04:47<1:06:57, 116.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.3828887939453, 'l2_loss': 67.55178833007812, 'l1_loss': 60.83110427856445}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.87109112739563 4.274569511


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.32it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0010, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  4%|▍         | 21028/488281 [04:51<9:48:21, 13.24it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21056/488281 [04:51<5:17:28, 24.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21084/488281 [04:52<3:04:43, 42.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21112/488281 [04:52<1:59:44, 65.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.29067993164062, 'l2_loss': 66.677490234375, 'l1_loss': 60.613189697265625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  4%|▍         | 21140/488281 [04:52<2:07:42, 60.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 21168/488281 [04:53<1:31:44, 84.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21196/488281 [04:53<1:14:06, 105.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.67977905273438,

  4%|▍         | 21224/488281 [04:53<1:05:29, 118.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21252/488281 [04:53<1:01:14, 127.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21280/488281 [04:53<59:09, 131.57it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21308/488281 [04:54<58:10, 133.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.76174926757812, 'l2_loss': 67.63873291015625, 'l1_loss': 61.123016357421875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 21336/488281 [04:54<1:37:42, 79.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 21364/488281 [04:54<1:17:01, 101.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21392/488281 [04:55<1:07:07, 115.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21420/488281 [04:55<1:02:03, 125.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.48025512695312, 'l2_loss': 67.74984741210938, 'l1_loss': 60.730403900146484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▍         | 21448/488281 [04:55<59:32, 130.67it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21476/488281 [04:55<58:19, 133.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21490/488281 [04:55<57:58, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.54481506347656, 'l2_loss': 67.16384887695312, 'l1_loss': 60.3809700012207}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 21518/488281 [04:56<1:37:42, 79.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 21546/488281 [04:56<1:17:00, 101.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21574/488281 [04:56<1:06:51, 116.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21602/488281 [04:57<1:01:55, 125.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.38096618652344, 'l2_loss': 67.74098205566406, 'l1_loss': 61.639984130859375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▍         | 21630/488281 [04:57<59:28, 130.79it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21658/488281 [04:57<58:15, 133.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21686/488281 [04:57<57:41, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 21714/488281 [04:58<1:37:23, 79.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.84217071533203, 'l2_loss': 67.16307067871094, 'l1_loss': 60.679100036621094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▍         | 21742/488281 [04:58<1:16:50, 101.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21770/488281 [04:58<1:06:46, 116.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21798/488281 [04:58<1:01:49, 125.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.5233154296875, 'l2_loss': 67.70195007324219, 'l1_loss': 60.821372985839844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  4%|▍         | 21826/488281 [04:58<59:26, 130.80it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21854/488281 [04:59<58:14, 133.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  4%|▍         | 21882/488281 [04:59<57:39, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 21910/488281 [04:59<1:37:47, 79.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.05958557128906, 'l2_loss': 67.79424285888672, 'l1_loss': 60.265342712402344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  4%|▍         | 21938/488281 [05:00<1:17:01, 100.90it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  4%|▍         | 21966/488281 [05:00<1:06:51, 116.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 21994/488281 [05:00<1:01:52, 125.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.29it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0007, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  5%|▍         | 22022/488281 [05:05<9:46:30, 13.25it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22050/488281 [05:05<5:16:29, 24.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22078/488281 [05:05<3:04:11, 42.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22105/488281 [05:06<2:40:41, 48.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.59727478027344, 'l2_loss': 67.04731750488281, 'l1_loss': 60.54995346069336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▍         | 22133/488281 [05:06<1:47:26, 72.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22161/488281 [05:06<1:21:39, 95.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22189/488281 [05:06<1:09:05, 112.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22217/488281 [05:07<1:02:57, 123.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.0517807006836, 'l2_loss': 67.43206024169922, 'l1_loss': 59.619720458984375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▍         | 22245/488281 [05:07<59:57, 129.56it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22259/488281 [05:07<59:04, 131.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22287/488281 [05:07<1:37:55, 79.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22315/488281 [05:08<1:17:06, 100.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.94502258300781, 'l2_loss': 67.49406433105469, 'l1_loss': 60.450958251953125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  5%|▍         | 22343/488281 [05:08<1:06:51, 116.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22371/488281 [05:08<1:01:50, 125.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22399/488281 [05:08<59:41, 130.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.37078857421875, 'l2_loss': 68.08505249023438, 'l1_loss': 60.285743713378906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  5%|▍         | 22427/488281 [05:08<58:21, 133.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22455/488281 [05:09<57:41, 134.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22483/488281 [05:09<1:37:16, 79.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22511/488281 [05:09<1:16:47, 101.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.1805419921875, 'l2_loss': 67.3946762084961, 'l1_loss': 59.78586959838867}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  5%|▍         | 22539/488281 [05:10<1:06:42, 116.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22567/488281 [05:10<1:01:45, 125.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22595/488281 [05:10<59:19, 130.83it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.93681335449219,

  5%|▍         | 22623/488281 [05:10<58:10, 133.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22651/488281 [05:10<57:35, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22679/488281 [05:11<1:37:06, 79.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22707/488281 [05:11<1:16:41, 101.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.34011840820312, 'l2_loss': 67.3014144897461, 'l1_loss': 61.0386962890625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  5%|▍         | 22735/488281 [05:11<1:06:38, 116.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22763/488281 [05:12<1:01:42, 125.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22791/488281 [05:12<59:17, 130.83it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22819/488281 [05:12<58:08, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.71127319335938, 'l2_loss': 67.56307983398438, 'l1_loss': 61.14820098876953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▍         | 22847/488281 [05:12<57:33, 134.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22861/488281 [05:13<1:54:16, 67.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22889/488281 [05:13<1:25:04, 91.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.70179748535156,

  5%|▍         | 22917/488281 [05:13<1:10:45, 109.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 22945/488281 [05:13<1:03:59, 121.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22973/488281 [05:14<1:00:24, 128.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 22987/488281 [05:14<59:22, 130.62it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.91156005859375, 'l2_loss': 67.0429458618164, 'l1_loss': 60.868614196777344}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.866217851638794 4.278775215148926 8.934155464172363
91.86%
Reconstruction: (0.9185946246674719, 3.866217851638794, 4.278775215148926,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.30it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A
  5%|▍         | 23001/488281 [05:18<13:27:09,  9.61it/s]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0015, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23029/488281 [05:18<7:04:32, 18.26it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23055/488281 [05:19<4:43:31, 27.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23083/488281 [05:19<2:46:14, 46.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23111/488281 [05:19<1:50:07, 70.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.37631225585938, 'l2_loss': 67.72955322265625, 'l1_loss': 60.646766662597656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  5%|▍         | 23139/488281 [05:20<1:22:54, 93.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23167/488281 [05:20<1:09:38, 111.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23195/488281 [05:20<1:03:10, 122.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.69337463378906, 'l2_loss': 67.90757751464844, 'l1_loss': 60.78580093383789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▍         | 23223/488281 [05:20<1:00:01, 129.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23251/488281 [05:21<1:38:16, 78.87it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23279/488281 [05:21<1:17:11, 100.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23307/488281 [05:21<1:06:53, 115.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.9747314453125, 'l2_loss': 68.04396057128906, 'l1_loss': 60.93076705932617}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  5%|▍         | 23335/488281 [05:21<1:01:47, 125.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23363/488281 [05:22<59:18, 130.64it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23391/488281 [05:22<58:05, 133.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23419/488281 [05:22<57:48, 134.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.1844482421875, 'l2_loss': 67.56757354736328, 'l1_loss': 60.616878509521484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23447/488281 [05:23<1:37:13, 79.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23475/488281 [05:23<1:16:40, 101.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23503/488281 [05:23<1:06:37, 116.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.89520263671875, 'l2_loss': 67.54011535644531, 'l1_loss': 60.3550910949707}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  5%|▍         | 23531/488281 [05:23<1:01:40, 125.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23559/488281 [05:23<59:13, 130.78it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23587/488281 [05:24<58:02, 133.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23615/488281 [05:24<57:29, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.18138122558594, 'l2_loss': 67.45693969726562, 'l1_loss': 60.72443771362305}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23629/488281 [05:24<1:54:06, 67.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23657/488281 [05:24<1:24:56, 91.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23685/488281 [05:25<1:10:38, 109.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23713/488281 [05:25<1:03:39, 121.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.67439270019531, 'l2_loss': 67.26175689697266, 'l1_loss': 60.412635803222656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  5%|▍         | 23741/488281 [05:25<1:00:11, 128.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23769/488281 [05:25<58:30, 132.32it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23797/488281 [05:25<57:41, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.0082244873047, 'l2_loss': 67.39923095703125, 'l1_loss': 60.60899353027344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23825/488281 [05:26<1:37:01, 79.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 23853/488281 [05:26<1:16:32, 101.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23881/488281 [05:26<1:06:30, 116.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23909/488281 [05:27<1:01:36, 125.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.0206756591797, 'l2_loss': 67.01895141601562, 'l1_loss': 61.00172805786133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  5%|▍         | 23937/488281 [05:27<59:11, 130.74it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23965/488281 [05:27<58:13, 132.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 23993/488281 [05:27<57:31, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 28
acts.shape=torch.Size([4096, 4096])
{'loss':


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.21it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0010, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  5%|▍         | 24021/488281 [05:32<10:22:21, 12.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 24049/488281 [05:33<5:33:56, 23.17it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 24077/488281 [05:33<3:12:36, 40.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 24105/488281 [05:33<2:03:23, 62.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.37405395507812, 'l2_loss': 68.35926055908203, 'l1_loss': 61.014793395996094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  5%|▍         | 24133/488281 [05:33<1:29:27, 86.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 24161/488281 [05:33<1:12:48, 106.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 24189/488281 [05:34<1:04:39, 119.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 24217/488281 [05:34<1:40:20, 77.08it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.4320068359375, 'l2_loss': 67.65611267089844, 'l1_loss': 60.7758903503418}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  5%|▍         | 24245/488281 [05:34<1:18:08, 98.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 24273/488281 [05:34<1:07:15, 114.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 24301/488281 [05:35<1:01:57, 124.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.50537109375, 'l

  5%|▍         | 24329/488281 [05:35<59:20, 130.31it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 24357/488281 [05:35<58:03, 133.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▍         | 24371/488281 [05:35<57:40, 134.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▍         | 24399/488281 [05:36<1:36:57, 79.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.75630950927734, 'l2_loss': 67.38591003417969, 'l1_loss': 60.370399475097656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  5%|▌         | 24427/488281 [05:36<1:16:30, 101.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24455/488281 [05:36<1:06:44, 115.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24483/488281 [05:36<1:01:39, 125.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24511/488281 [05:37<59:12, 130.55it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.17735290527344, 'l2_loss': 67.68045806884766, 'l1_loss': 60.49689483642578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▌         | 24539/488281 [05:37<57:58, 133.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24567/488281 [05:37<57:22, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 24595/488281 [05:38<1:37:22, 79.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.7315216064453, 'l2_loss': 67.94367980957031, 'l1_lo

  5%|▌         | 24623/488281 [05:38<1:16:41, 100.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24651/488281 [05:38<1:06:31, 116.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24679/488281 [05:38<1:01:32, 125.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24707/488281 [05:38<59:08, 130.66it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.64216613769531, 'l2_loss': 66.99006652832031, 'l1_loss': 60.652099609375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  5%|▌         | 24735/488281 [05:39<57:55, 133.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24763/488281 [05:39<57:20, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 24791/488281 [05:39<1:36:42, 79.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 24819/488281 [05:40<1:16:21, 101.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.16017150878906, 'l2_loss': 67.85733032226562, 'l1_loss': 60.3028450012207}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  5%|▌         | 24847/488281 [05:40<1:06:21, 116.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24875/488281 [05:40<1:01:26, 125.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24903/488281 [05:40<59:04, 130.72it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.77871704101562, 'l2_loss': 67.23480224609375, 'l1_loss': 59.54391098022461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▌         | 24931/488281 [05:40<57:53, 133.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 24959/488281 [05:41<57:18, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 24973/488281 [05:41<1:54:05, 67.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 24987/488281 [05:41<1:36:54, 79.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.55157470703125, 'l2_loss': 67.19342041015625, 'l1_loss': 60.358150482177734}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.910937547683716 4.306679725646973 8.965209007263184
92.17%
Reconstruction: (0.9217014398359632, 3.910937547683716, 4.306679725646973, 8.965209007263184)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.06it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.52it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


  5%|▌         | 25001/488281 [05:46<13:52:36,  9.27it/s]

tensor(0.0007, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25015/488281 [05:46<9:59:49, 12.87it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25029/488281 [05:46<7:16:53, 17.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25043/488281 [05:46<5:22:50, 23.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25057/488281 [05:46<4:03:00, 31.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25071/488281 [05:46<3:07:08, 41.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25085/488281 [05:46<2:28:01, 52.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25099/488281 [05:46<2:00:38, 63.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.47677612304688, 'l2_loss': 66.78465270996094, 'l1_loss': 59.6921272277832}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  5%|▌         | 25113/488281 [05:47<1:41:30, 76.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25127/488281 [05:47<1:28:04, 87.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25141/488281 [05:47<1:18:40, 98.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25169/488281 [05:47<1:47:01, 72.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25197/488281 [05:48<1:21:22, 94.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.54945373535156, 'l2_loss': 67.34829711914062, 'l1_loss': 60.2011604309082}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  5%|▌         | 25225/488281 [05:48<1:08:49, 112.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25253/488281 [05:48<1:02:38, 123.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25281/488281 [05:48<59:36, 129.44it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25309/488281 [05:48<58:10, 132.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.84327697753906, 'l2_loss': 66.91545867919922, 'l1_loss': 59.92781448364258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▌         | 25337/488281 [05:49<57:25, 134.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25365/488281 [05:49<1:36:39, 79.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25393/488281 [05:49<1:16:16, 101.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25421/488281 [05:50<1:06:18, 116.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.31466674804688, 'l2_loss': 67.12188720703125, 'l1_loss': 60.19277572631836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▌         | 25449/488281 [05:50<1:01:24, 125.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25477/488281 [05:50<59:15, 130.18it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25505/488281 [05:50<57:59, 133.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.54734802246094, 'l2_loss': 67.01492309570312, 'l1_loss': 60.53242492675781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▌         | 25533/488281 [05:50<57:20, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25561/488281 [05:51<1:36:42, 79.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25589/488281 [05:51<1:16:17, 101.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25617/488281 [05:51<1:06:18, 116.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.95244598388672, 'l2_loss': 67.69849395751953, 'l1_loss': 60.25395202636719}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▌         | 25645/488281 [05:52<1:01:24, 125.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25673/488281 [05:52<58:59, 130.70it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25701/488281 [05:52<57:50, 133.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.54729461669922,

  5%|▌         | 25715/488281 [05:52<57:30, 134.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25743/488281 [05:53<1:36:41, 79.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25771/488281 [05:53<1:16:16, 101.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25799/488281 [05:53<1:06:16, 116.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.6275634765625, 'l2_loss': 67.46847534179688, 'l1_loss': 60.15908432006836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  5%|▌         | 25827/488281 [05:53<1:01:22, 125.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25855/488281 [05:53<58:57, 130.71it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25883/488281 [05:54<57:47, 133.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25911/488281 [05:54<57:14, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.83317565917969, 'l2_loss': 67.46572875976562, 'l1_loss': 60.36745071411133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25939/488281 [05:54<1:36:32, 79.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 25967/488281 [05:55<1:16:11, 101.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 25995/488281 [05:55<1:06:29, 115.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 29
acts.shape=torch.Size([4096, 4096])
{'loss':


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.22it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.52it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0007, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  5%|▌         | 26023/488281 [06:00<9:44:58, 13.17it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26051/488281 [06:00<5:15:29, 24.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26079/488281 [06:00<3:03:25, 42.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26107/488281 [06:00<1:58:45, 64.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.84447479248047, 'l2_loss': 67.2978515625, 'l1_loss': 60.54662322998047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 26135/488281 [06:01<2:06:28, 60.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 26163/488281 [06:01<1:30:49, 84.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26191/488281 [06:01<1:13:20, 105.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26219/488281 [06:01<1:04:48, 118.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.98908996582031, 'l2_loss': 67.49236297607422, 'l1_loss': 60.49672317504883}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  5%|▌         | 26247/488281 [06:01<1:00:35, 127.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26275/488281 [06:02<58:33, 131.51it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26303/488281 [06:02<57:34, 133.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.86563110351562, 'l2_loss': 67.56643676757812, 'l1_loss': 60.299198150634766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 26317/488281 [06:02<1:53:52, 67.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 26345/488281 [06:03<1:24:38, 90.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26373/488281 [06:03<1:10:18, 109.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26401/488281 [06:03<1:03:18, 121.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.44151306152344, 'l2_loss': 67.44108581542969, 'l1_loss': 61.000423431396484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  5%|▌         | 26429/488281 [06:03<59:50, 128.63it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26457/488281 [06:03<58:09, 132.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26485/488281 [06:04<57:46, 133.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 26513/488281 [06:04<1:36:45, 79.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 129.17807006835938, 'l2_loss': 68.1068344116211, 'l1_loss': 61.07124328613281}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  5%|▌         | 26541/488281 [06:04<1:16:14, 100.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26569/488281 [06:05<1:06:10, 116.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26597/488281 [06:05<1:01:14, 125.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.44227600097656, 'l2_loss': 67.88200378417969, 'l1_loss': 60.560272216796875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  5%|▌         | 26625/488281 [06:05<58:51, 130.74it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26653/488281 [06:05<57:39, 133.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26681/488281 [06:05<57:04, 134.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  5%|▌         | 26709/488281 [06:06<1:36:24, 79.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.6907958984375, 'l2_loss': 68.06859588623047, 'l1_loss': 60.62220764160156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  5%|▌         | 26737/488281 [06:06<1:16:03, 101.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26765/488281 [06:06<1:06:05, 116.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26793/488281 [06:07<1:01:11, 125.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  5%|▌         | 26821/488281 [06:07<58:48, 130.78it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.16323852539062, 'l2_loss': 67.15933990478516, 'l1_loss': 61.003902435302734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  5%|▌         | 26849/488281 [06:07<57:38, 133.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 26877/488281 [06:07<57:02, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 26905/488281 [06:08<1:36:22, 79.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.64054870605469, 'l2_loss': 67.6849594116211, 'l1_loss': 59.95558547973633}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  6%|▌         | 26933/488281 [06:08<1:16:01, 101.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 26961/488281 [06:08<1:06:02, 116.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 26989/488281 [06:08<1:01:09, 125.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.30it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0005, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  6%|▌         | 27017/488281 [06:13<9:38:16, 13.29it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27045/488281 [06:13<5:12:09, 24.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27059/488281 [06:13<3:55:26, 32.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27086/488281 [06:14<3:05:27, 41.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27114/488281 [06:14<1:59:12, 64.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.56687927246094, 'l2_loss': 67.31322479248047, 'l1_loss': 60.25365447998047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 27142/488281 [06:14<1:27:05, 88.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27170/488281 [06:14<1:11:26, 107.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27198/488281 [06:15<1:03:47, 120.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.3444061279297, 'l2_loss': 67.47120666503906, 'l1_loss': 60.87320327758789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  6%|▌         | 27226/488281 [06:15<1:00:04, 127.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27254/488281 [06:15<58:14, 131.95it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27282/488281 [06:16<1:36:47, 79.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27310/488281 [06:16<1:16:14, 100.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.26646423339844, 'l2_loss': 67.56890869140625, 'l1_loss': 60.69755935668945}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 27338/488281 [06:16<1:06:09, 116.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27366/488281 [06:16<1:01:11, 125.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27394/488281 [06:16<58:45, 130.73it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.06401824951172,

  6%|▌         | 27422/488281 [06:17<57:35, 133.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27450/488281 [06:17<57:00, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27478/488281 [06:17<1:36:10, 79.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27506/488281 [06:18<1:16:26, 100.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.8375244140625, 'l2_loss': 67.69864654541016, 'l1_loss': 60.13887405395508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  6%|▌         | 27534/488281 [06:18<1:06:13, 115.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27562/488281 [06:18<1:01:11, 125.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27590/488281 [06:18<58:45, 130.67it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27618/488281 [06:18<57:34, 133.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.7709732055664, 'l2_loss': 66.85929107666016, 'l1_loss': 59.91168212890625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  6%|▌         | 27646/488281 [06:19<56:59, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27660/488281 [06:19<1:53:15, 67.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27688/488281 [06:19<1:24:15, 91.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.2032012939453, 

  6%|▌         | 27716/488281 [06:20<1:10:03, 109.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27744/488281 [06:20<1:03:05, 121.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27772/488281 [06:20<59:40, 128.60it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27800/488281 [06:20<57:59, 132.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.07102966308594, 'l2_loss': 67.25128173828125, 'l1_loss': 59.81974792480469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 27828/488281 [06:20<57:12, 134.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27856/488281 [06:21<1:36:14, 79.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 27884/488281 [06:21<1:15:54, 101.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27912/488281 [06:21<1:05:58, 116.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.25198364257812, 'l2_loss': 67.6490478515625, 'l1_loss': 59.602935791015625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 27940/488281 [06:22<1:01:04, 125.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27968/488281 [06:22<58:40, 130.77it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 27996/488281 [06:22<57:29, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 30
acts.shape=torch.Size([4096, 4096])
{'loss': 128.03680419921875, 'l2_loss': 67.41876220703125, 'l1_loss': 60.618045806884766}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size(


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.98it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.72it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0005, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  6%|▌         | 28024/488281 [06:27<9:37:00, 13.29it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28049/488281 [06:27<6:04:38, 21.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 28077/488281 [06:27<3:23:51, 37.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28105/488281 [06:28<2:07:49, 60.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.08685302734375, 'l2_loss': 67.74275207519531, 'l1_loss': 60.344093322753906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  6%|▌         | 28133/488281 [06:28<1:31:09, 84.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28161/488281 [06:28<1:13:20, 104.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28189/488281 [06:28<1:04:39, 118.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28217/488281 [06:28<1:00:25, 126.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.01605224609375, 'l2_loss': 66.88900756835938, 'l1_loss': 60.127044677734375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 28245/488281 [06:29<1:37:43, 78.46it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 28273/488281 [06:29<1:16:35, 100.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28301/488281 [06:29<1:06:17, 115.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.81478881835938, 'l2_loss': 66.88712310791016, 'l1_loss': 59.92766571044922}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 28329/488281 [06:30<1:01:11, 125.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28357/488281 [06:30<59:07, 129.65it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 28385/488281 [06:30<57:53, 132.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28413/488281 [06:30<57:06, 134.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.05172729492188, 'l2_loss': 67.04316711425781, 'l1_loss': 60.0085563659668}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  6%|▌         | 28441/488281 [06:31<1:36:02, 79.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 28469/488281 [06:31<1:15:46, 101.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28497/488281 [06:31<1:05:50, 116.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28525/488281 [06:31<1:01:11, 125.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.56352233886719, 'l2_loss': 67.32255554199219, 'l1_loss': 60.240966796875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  6%|▌         | 28553/488281 [06:32<58:41, 130.55it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28581/488281 [06:32<57:28, 133.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28595/488281 [06:32<57:06, 134.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.28746795654297, 'l2_loss': 67.23472595214844, 'l1_loss': 60.05274200439453}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 28623/488281 [06:32<1:36:07, 79.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 28651/488281 [06:33<1:15:48, 101.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28679/488281 [06:33<1:05:50, 116.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28707/488281 [06:33<1:00:59, 125.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.03448486328125, 'l2_loss': 67.61284637451172, 'l1_loss': 60.42163848876953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 28735/488281 [06:33<58:34, 130.76it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28763/488281 [06:33<57:23, 133.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28791/488281 [06:34<56:49, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 28819/488281 [06:34<1:35:53, 79.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.74513244628906, 'l2_loss': 67.615966796875, 'l1_loss': 60.12916564941406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  6%|▌         | 28847/488281 [06:34<1:15:40, 101.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28875/488281 [06:35<1:05:45, 116.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28903/488281 [06:35<1:00:56, 125.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.24995422363281, 'l2_loss': 67.38336181640625, 'l1_loss': 59.86659622192383}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 28931/488281 [06:35<58:32, 130.76it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28959/488281 [06:35<57:22, 133.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 28987/488281 [06:35<56:48, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.30it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A
  6%|▌         | 29001/488281 [06:40<14:11:29,  8.99it/s]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29029/488281 [06:41<7:26:13, 17.15it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29057/488281 [06:41<4:07:19, 30.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29085/488281 [06:41<2:29:52, 51.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29113/488281 [06:41<1:42:08, 74.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.33299255371094, 'l2_loss': 67.04305267333984, 'l1_loss': 60.28994369506836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 29141/488281 [06:41<1:18:42, 97.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29169/488281 [06:42<1:07:14, 113.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29183/488281 [06:42<1:03:56, 119.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29197/488281 [06:42<1:57:45, 64.97it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.5053939819336, 'l2_loss': 67.61215209960938, 'l1_loss': 59.89324188232422}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  6%|▌         | 29225/488281 [06:42<1:26:23, 88.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29253/488281 [06:43<1:10:59, 107.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29281/488281 [06:43<1:03:26, 120.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29309/488281 [06:43<59:45, 127.99it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.41696166992188, 'l2_loss': 67.44120788574219, 'l1_loss': 59.97575759887695}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 29337/488281 [06:43<57:56, 132.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29365/488281 [06:43<57:02, 134.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29393/488281 [06:44<1:35:53, 79.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.54581451416016, 'l2_loss': 67.49504089355469, 'l1_l

  6%|▌         | 29421/488281 [06:44<1:15:39, 101.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29449/488281 [06:44<1:05:43, 116.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29477/488281 [06:45<1:00:50, 125.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29505/488281 [06:45<58:28, 130.75it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.67120361328125, 'l2_loss': 66.53398132324219, 'l1_loss': 60.13722610473633}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 29533/488281 [06:45<57:18, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29561/488281 [06:45<56:59, 134.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29589/488281 [06:46<1:35:53, 79.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29617/488281 [06:46<1:15:39, 101.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.85086059570312, 'l2_loss': 67.43297576904297, 'l1_loss': 60.41788101196289}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 29645/488281 [06:46<1:05:42, 116.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29673/488281 [06:46<1:00:49, 125.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29701/488281 [06:47<58:27, 130.74it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.13422393798828, 'l2_loss': 66.58688354492188, 'l1_loss': 59.547340393066406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  6%|▌         | 29729/488281 [06:47<57:16, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29757/488281 [06:47<56:42, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29785/488281 [06:48<1:35:40, 79.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29813/488281 [06:48<1:15:32, 101.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.16751861572266, 'l2_loss': 67.23866271972656, 'l1_loss': 59.928855895996094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  6%|▌         | 29841/488281 [06:48<1:05:38, 116.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29869/488281 [06:48<1:00:46, 125.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29897/488281 [06:48<58:23, 130.82it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 29925/488281 [06:49<57:15, 133.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.23918151855469, 'l2_loss': 67.44552612304688, 'l1_loss': 59.79365539550781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 29939/488281 [06:49<56:55, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29967/488281 [06:49<1:35:44, 79.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 29995/488281 [06:49<1:15:31, 101.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 31



  0%|          | 0/50 [00:00<?, ?it/s][A

acts.shape=torch.Size([65536, 4096])



  4%|▍         | 2/50 [00:00<00:02, 17.98it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



  8%|▊         | 4/50 [00:00<00:03, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])



 12%|█▏        | 6/50 [00:00<00:03, 11.61it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 16%|█▌        | 8/50 [00:00<00:03, 11.16it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 20%|██        | 10/50 [00:00<00:03, 10.89it/s][A

acts.shape=torch.Size([65536, 4096])



 24%|██▍       | 12/50 [00:01<00:03, 10.77it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 28%|██▊       | 14/50 [00:01<00:03, 10.68it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 32%|███▏      | 16/50 [00:01<00:03, 10.63it/s][A

acts.shape=torch.Size([65536, 4096])



 36%|███▌      | 18/50 [00:01<00:03, 10.60it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 40%|████      | 20/50 [00:01<00:02, 10.58it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 44%|████▍     | 22/50 [00:02<00:02, 10.56it/s][A

acts.shape=torch.Size([65536, 4096])



 48%|████▊     | 24/50 [00:02<00:02, 10.55it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 52%|█████▏    | 26/50 [00:02<00:02, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 56%|█████▌    | 28/50 [00:02<00:02, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])



 60%|██████    | 30/50 [00:02<00:01, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 64%|██████▍   | 32/50 [00:02<00:01, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 68%|██████▊   | 34/50 [00:03<00:01, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])



 72%|███████▏  | 36/50 [00:03<00:01, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 76%|███████▌  | 38/50 [00:03<00:01, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 40/50 [00:03<00:00, 10.51it/s][A

acts.shape=torch.Size([65536, 4096])



 84%|████████▍ | 42/50 [00:03<00:00, 10.52it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 88%|████████▊ | 44/50 [00:04<00:00, 10.52it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 92%|█████████▏| 46/50 [00:04<00:00, 10.52it/s][A

acts.shape=torch.Size([65536, 4096])



 96%|█████████▌| 48/50 [00:04<00:00, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 50/50 [00:04<00:00, 10.70it/s][A


acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
Resetting neurons! tensor(2, device='cuda:0')
torch.Size([4096, 512]) torch.Size([512, 4096]) torch.Size([4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.0891876220703, 'l2_loss': 67.54952239990234, 'l1_loss': 60.53966522216797}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8943679332733154 4.297476768493652 8.929861068725586
91.99%
Reconstruction: (0.9199465028792794, 3.8943679332733154, 4.297476768493652, 8.929861068725586)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.27it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  6%|▌         | 30022/488281 [06:59<19:05:26,  6.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30050/488281 [06:59<9:45:45, 13.04it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30078/488281 [06:59<5:14:40, 24.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30106/488281 [07:00<3:02:38, 41.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.10493469238281, 'l2_loss': 66.90008544921875, 'l1_loss': 60.2048454284668}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  6%|▌         | 30134/488281 [07:00<1:58:04, 64.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30162/488281 [07:00<2:05:38, 60.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 30190/488281 [07:00<1:30:10, 84.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30218/488281 [07:01<1:12:50, 104.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.82605743408203, 'l2_loss': 67.43463134765625, 'l1_loss': 60.39142608642578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 30246/488281 [07:01<1:04:19, 118.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30274/488281 [07:01<1:00:08, 126.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30302/488281 [07:01<58:07, 131.31it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.3495101928711, 'l2_loss': 67.58562469482422, 'l1_loss': 59.763885498046875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 30330/488281 [07:02<57:07, 133.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 30358/488281 [07:02<1:35:48, 79.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▌         | 30386/488281 [07:02<1:15:32, 101.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30414/488281 [07:02<1:05:38, 116.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.84551239013672, 'l2_loss': 67.56800079345703, 'l1_loss': 60.27751159667969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▌         | 30442/488281 [07:03<1:00:44, 125.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30470/488281 [07:03<58:33, 130.29it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▌         | 30498/488281 [07:03<57:17, 133.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.25396728515625,

  6%|▋         | 30526/488281 [07:03<56:40, 134.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 30540/488281 [07:04<1:52:38, 67.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 30568/488281 [07:04<1:23:46, 91.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 30596/488281 [07:04<1:09:37, 109.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.86154174804688, 'l2_loss': 67.65208435058594, 'l1_loss': 60.20945358276367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▋         | 30624/488281 [07:04<1:02:42, 121.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 30652/488281 [07:05<59:17, 128.63it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 30680/488281 [07:05<57:37, 132.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 30708/488281 [07:05<56:49, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.67224884033203, 'l2_loss': 67.09898376464844, 'l1_loss': 59.573265075683594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 30736/488281 [07:06<1:35:33, 79.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 30764/488281 [07:06<1:15:23, 101.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 30792/488281 [07:06<1:05:30, 116.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.65943908691406,

  6%|▋         | 30820/488281 [07:06<1:00:40, 125.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 30848/488281 [07:06<58:16, 130.82it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 30876/488281 [07:07<57:07, 133.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 30904/488281 [07:07<56:34, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 128.13230895996094, 'l2_loss': 67.66725158691406, 'l1_loss': 60.46505355834961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 30932/488281 [07:07<1:07:19, 113.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 30960/488281 [07:07<1:01:32, 123.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 30988/488281 [07:07<58:42, 129.82it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.37it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


  6%|▋         | 31002/488281 [07:12<13:14:46,  9.59it/s]

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31016/488281 [07:12<9:33:07, 13.30it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31030/488281 [07:12<6:57:58, 18.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31044/488281 [07:12<5:09:22, 24.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31058/488281 [07:13<3:53:21, 32.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31072/488281 [07:13<3:00:08, 42.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31086/488281 [07:13<2:22:53, 53.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31100/488281 [07:13<1:56:48, 65.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.3358154296875, 'l2_loss': 67.1773452758789, 'l1_loss': 60.158470153808594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31128/488281 [07:13<1:34:12, 80.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31156/488281 [07:13<1:14:41, 102.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31184/488281 [07:13<1:05:07, 116.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31212/488281 [07:14<1:00:28, 125.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.48585510253906, 'l2_loss': 67.72763061523438, 'l1_loss': 59.75822067260742}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▋         | 31240/488281 [07:14<58:10, 130.94it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31268/488281 [07:14<57:02, 133.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31282/488281 [07:14<56:43, 134.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31310/488281 [07:15<1:04:56, 117.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.77024841308594, 'l2_loss': 66.29350280761719, 'l1_loss': 59.476741790771484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  6%|▋         | 31338/488281 [07:15<1:00:23, 126.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31366/488281 [07:15<58:07, 131.03it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31394/488281 [07:15<56:59, 133.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.57068634033203, 'l2_loss': 66.93434143066406, 'l1_loss': 59.63634490966797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▋         | 31422/488281 [07:15<56:28, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31450/488281 [07:16<56:11, 135.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31478/488281 [07:16<56:03, 135.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31506/488281 [07:16<1:14:30, 102.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.68525695800781, 'l2_loss': 66.96365356445312, 'l1_loss': 59.72160339355469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▋         | 31534/488281 [07:16<1:05:01, 117.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31562/488281 [07:17<1:00:22, 126.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31590/488281 [07:17<58:04, 131.05it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31618/488281 [07:17<56:59, 133.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.74828338623047, 'l2_loss': 67.15728759765625, 'l1_loss': 59.59099578857422}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▋         | 31646/488281 [07:17<56:25, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  6%|▋         | 31674/488281 [07:17<56:09, 135.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  6%|▋         | 31702/488281 [07:18<1:04:55, 117.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.39800262451172, 'l2_loss': 66.81681060791016, 'l1_loss': 59.58119201660156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  6%|▋         | 31730/488281 [07:18<1:00:18, 126.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 31758/488281 [07:18<58:02, 131.08it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 31786/488281 [07:18<56:56, 133.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 31814/488281 [07:18<56:25, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.09892272949219, 'l2_loss': 67.10655212402344, 'l1_loss': 59.99237060546875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 31842/488281 [07:19<56:08, 135.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 31870/488281 [07:19<56:00, 135.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 31884/488281 [07:19<1:08:02, 111.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 31912/488281 [07:19<1:01:51, 122.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.10482025146484, 'l2_loss': 66.84921264648438, 'l1_loss': 59.25560760498047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 31940/488281 [07:19<58:47, 129.36it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 31968/488281 [07:20<57:17, 132.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 31996/488281 [07:20<56:32, 134.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 32
acts.shape=torch.Size([4096, 4096])
{'loss': 126.4632568359375, 'l2_loss': 67.15718078613281, 'l1_loss': 59.30607604980469}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.908145


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.02it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.82it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  7%|▋         | 32024/488281 [07:25<9:31:50, 13.30it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32052/488281 [07:25<5:08:39, 24.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32065/488281 [07:25<4:08:50, 30.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32093/488281 [07:25<2:29:22, 50.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.8076171875, 'l2_loss': 66.75204467773438, 'l1_loss': 60.05557632446289}
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

  7%|▋         | 32121/488281 [07:25<1:41:25, 74.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32149/488281 [07:26<1:18:05, 97.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32177/488281 [07:26<1:06:42, 113.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32205/488281 [07:26<1:01:10, 124.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.30718994140625, 'l2_loss': 66.79354858398438, 'l1_loss': 59.513641357421875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  7%|▋         | 32233/488281 [07:26<58:26, 130.07it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32247/488281 [07:26<57:38, 131.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32261/488281 [07:26<1:09:06, 109.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32275/488281 [07:27<1:05:07, 116.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32289/488281 [07:27<1:02:19, 121.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32303/488281 [07:27<1:00:23, 125.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 127.00324249267578, 'l2_loss': 67.16338348388672, 'l1_loss': 59.83985900878906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 32317/488281 [07:27<59:00, 128.79it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32331/488281 [07:27<58:02, 130.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32345/488281 [07:27<57:21, 132.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32359/488281 [07:27<56:53, 133.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32373/488281 [07:27<56:33, 134.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32387/488281 [07:27<56:19, 134.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32401/488281 [07:27<56:12, 135.19it/s]

{'loss': 125.92174530029297, 'l2_loss': 66.64248657226562, 'l1_loss': 59.279258728027344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32415/488281 [07:28<56:04, 135.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32429/488281 [07:28<55:59, 135.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32443/488281 [07:28<55:56, 135.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32471/488281 [07:28<1:04:19, 118.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32499/488281 [07:28<59:56, 126.72it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.46097564697266, 'l2_loss': 66.08049011230469, 'l1_loss': 59.38048553466797}
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 32527/488281 [07:28<57:59, 130.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32555/488281 [07:29<56:50, 133.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32583/488281 [07:29<56:16, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32611/488281 [07:29<56:02, 135.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.74966430664062, 'l2_loss': 66.74568176269531, 'l1_loss': 60.00398635864258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 32639/488281 [07:29<55:55, 135.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32653/488281 [07:29<1:08:15, 111.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32681/488281 [07:30<1:01:55, 122.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32709/488281 [07:30<58:48, 129.11it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.12828063964844, 'l2_loss': 66.87319946289062, 'l1_loss': 59.25508499145508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 32737/488281 [07:30<57:14, 132.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32765/488281 [07:30<56:28, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32793/488281 [07:31<56:05, 135.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.99198913574219, 'l2_loss': 67.4066390991211, 'l1_loss': 59.58534622192383}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  7%|▋         | 32821/488281 [07:31<55:56, 135.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32849/488281 [07:31<1:04:18, 118.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 32877/488281 [07:31<59:55, 126.66it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32905/488281 [07:31<57:48, 131.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.97894287109375, 'l2_loss': 67.03265380859375, 'l1_loss': 59.946292877197266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  7%|▋         | 32933/488281 [07:32<56:44, 133.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32961/488281 [07:32<56:12, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 32989/488281 [07:32<55:56, 135.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.00it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.64it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  7%|▋         | 33017/488281 [07:37<9:29:30, 13.32it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 33042/488281 [07:37<5:28:07, 23.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 33070/488281 [07:37<3:06:03, 40.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33098/488281 [07:37<1:58:50, 63.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.20654296875, 'l2_loss': 66.57568359375, 'l1_loss': 59.630863189697266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch

  7%|▋         | 33126/488281 [07:38<1:26:29, 87.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33154/488281 [07:38<1:10:44, 107.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33182/488281 [07:38<1:03:03, 120.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33210/488281 [07:38<59:20, 127.80it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.6103744506836, 'l2_loss': 67.04574584960938, 'l1_loss': 59.56462860107422}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 33238/488281 [07:39<1:06:02, 114.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 33266/488281 [07:39<1:00:45, 124.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33294/488281 [07:39<58:09, 130.38it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33322/488281 [07:39<56:55, 133.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.55741882324219, 'l2_loss': 66.63163757324219, 'l1_loss': 58.925777435302734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  7%|▋         | 33350/488281 [07:39<56:16, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33378/488281 [07:40<55:58, 135.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33406/488281 [07:40<55:52, 135.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.37200164794922, 'l2_loss': 67.14318084716797, 'l1_loss': 59.22882080078125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 33420/488281 [07:40<1:07:50, 111.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 33448/488281 [07:40<1:01:38, 122.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33476/488281 [07:40<58:35, 129.36it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33504/488281 [07:41<57:07, 132.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.49230194091797, 'l2_loss': 66.32487487792969, 'l1_loss': 59.16742706298828}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 33532/488281 [07:41<56:34, 133.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33560/488281 [07:41<56:06, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33588/488281 [07:41<55:52, 135.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 33616/488281 [07:41<1:04:22, 117.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.93585205078125, 'l2_loss': 66.69499969482422, 'l1_loss': 59.240848541259766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  7%|▋         | 33644/488281 [07:42<59:55, 126.44it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33672/488281 [07:42<57:43, 131.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33700/488281 [07:42<56:39, 133.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.57540893554688, 'l2_loss': 66.25468444824219, 'l1_loss': 59.32072448730469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 33728/488281 [07:42<56:09, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33756/488281 [07:42<55:52, 135.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33784/488281 [07:43<55:44, 135.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 33812/488281 [07:43<1:04:08, 118.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.68486785888672, 'l2_loss': 66.68345642089844, 'l1_loss': 59.00141143798828}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 33840/488281 [07:43<59:47, 126.68it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33868/488281 [07:43<57:38, 131.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33896/488281 [07:44<56:36, 133.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.3281478881836, 

  7%|▋         | 33924/488281 [07:44<56:06, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33952/488281 [07:44<55:50, 135.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 33980/488281 [07:44<55:43, 135.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 33994/488281 [07:44<1:07:41, 111.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 33
acts.shape=torch.Size([4096, 4096])
{'loss': 125.28257751464844, 'l2_loss': 66.4253921508789, 'l1_loss': 58.857181549072266}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8381266593933105 4.249104022979736 8.8594


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.23it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  7%|▋         | 34020/488281 [07:49<9:51:17, 12.80it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34048/488281 [07:49<5:14:14, 24.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34076/488281 [07:50<3:01:22, 41.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34104/488281 [07:50<1:57:01, 64.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.7396240234375, 'l2_loss': 67.0093765258789, 'l1_loss': 59.730247497558594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  7%|▋         | 34132/488281 [07:50<1:25:38, 88.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34160/488281 [07:50<1:10:17, 107.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34174/488281 [07:50<1:05:53, 114.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 34202/488281 [07:51<1:09:14, 109.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.86493682861328, 'l2_loss': 66.22441101074219, 'l1_loss': 58.640525817871094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  7%|▋         | 34230/488281 [07:51<1:02:16, 121.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34258/488281 [07:51<58:50, 128.59it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34286/488281 [07:51<57:09, 132.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34314/488281 [07:51<56:22, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 126.63421630859375, 'l2_loss': 67.05744934082031, 'l1_loss': 59.57676696777344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 34342/488281 [07:52<55:57, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34370/488281 [07:52<1:07:55, 111.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

  7%|▋         | 34398/488281 [07:52<1:01:38, 122.72it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 34426/488281 [07:52<58:33, 129.16it/s]  

{'loss': 126.30029296875, 'l2_loss': 66.7004165649414, 'l1_loss': 59.599876403808594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  7%|▋         | 34454/488281 [07:52<57:01, 132.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34482/488281 [07:53<56:15, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34510/488281 [07:53<55:54, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.99298858642578, 'l2_loss': 67.16816711425781, 'l1_loss': 59.82482147216797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 34538/488281 [07:53<55:43, 135.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34552/488281 [07:53<55:40, 135.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 34580/488281 [07:53<1:04:17, 117.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 34608/488281 [07:54<59:51, 126.32it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.70126342773438, 'l2_loss': 66.62957763671875, 'l1_loss': 59.07168960571289}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 34636/488281 [07:54<57:38, 131.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34664/488281 [07:54<56:33, 133.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34692/488281 [07:54<56:00, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34720/488281 [07:54<55:46, 135.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.59941101074219, 'l2_loss': 67.21533966064453, 'l1_loss': 59.38407516479492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 34748/488281 [07:55<55:39, 135.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 34776/488281 [07:55<1:04:50, 116.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 34804/488281 [07:55<1:00:06, 125.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.96025085449219, 'l2_loss': 66.82972717285156, 'l1_loss': 59.13052749633789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 34832/488281 [07:55<57:45, 130.86it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34860/488281 [07:56<56:36, 133.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34888/488281 [07:56<56:01, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 34916/488281 [07:56<55:46, 135.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.89450073242188, 'l2_loss': 66.7319107055664, 'l1_loss': 59.162593841552734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 34930/488281 [07:56<55:41, 135.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 34958/488281 [07:56<1:04:52, 116.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 34986/488281 [07:57<1:00:04, 125.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35000/488281 [07:57<58:42, 128.68it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.62736511230469, 'l2_loss': 66.56163787841797, 'l1_loss': 59.06572723388672}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.896369218826294 4.309628963470459 8.981417655944824
91.87%
Reconstruction: (0.9187304212035509, 3.896369218826294, 4.309628963470459, 8.981417655944824)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 15.88it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 11.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


  7%|▋         | 35028/488281 [08:01<9:31:43, 13.21it/s] 

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 409

  7%|▋         | 35042/488281 [08:02<6:56:49, 18.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35070/488281 [08:02<3:52:50, 32.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35098/488281 [08:02<2:22:22, 53.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.77891540527344, 'l2_loss': 67.00137329101562, 'l1_loss': 58.77754211425781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 35126/488281 [08:02<1:38:04, 77.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35154/488281 [08:02<1:24:50, 89.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35182/488281 [08:03<1:09:50, 108.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35210/488281 [08:03<1:02:31, 120.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.47279357910156, 'l2_loss': 66.33426666259766, 'l1_loss': 59.13852310180664}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 35238/488281 [08:03<58:54, 128.18it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35266/488281 [08:03<57:07, 132.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35294/488281 [08:03<56:15, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.14874267578125,

  7%|▋         | 35322/488281 [08:04<55:51, 135.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35350/488281 [08:04<1:12:04, 104.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35378/488281 [08:04<1:03:34, 118.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35406/488281 [08:04<59:26, 127.00it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.78681945800781, 'l2_loss': 66.52749633789062, 'l1_loss': 59.25932693481445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 35434/488281 [08:05<57:22, 131.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35462/488281 [08:05<56:21, 133.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35490/488281 [08:05<55:52, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35518/488281 [08:05<55:39, 135.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.34223937988281, 'l2_loss': 67.28765869140625, 'l1_loss': 59.05458068847656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35532/488281 [08:05<1:07:29, 111.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35560/488281 [08:06<1:01:19, 123.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35588/488281 [08:06<58:36, 128.75it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.35271453857422,

  7%|▋         | 35616/488281 [08:06<56:58, 132.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35644/488281 [08:06<56:09, 134.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35672/488281 [08:06<55:45, 135.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35700/488281 [08:07<55:34, 135.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.98556518554688, 'l2_loss': 66.84782409667969, 'l1_loss': 59.13774108886719}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35728/488281 [08:07<1:04:01, 117.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35756/488281 [08:07<59:36, 126.51it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35784/488281 [08:07<57:26, 131.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35812/488281 [08:08<56:24, 133.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.39811706542969, 'l2_loss': 66.28713989257812, 'l1_loss': 59.11097717285156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 35840/488281 [08:08<55:52, 134.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35868/488281 [08:08<55:36, 135.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35896/488281 [08:08<55:28, 135.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.50286865234375, 'l2_loss': 66.64423370361328, 'l1_loss': 58.8586311340332}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35924/488281 [08:08<1:04:05, 117.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 35952/488281 [08:09<59:37, 126.42it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35980/488281 [08:09<57:26, 131.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 35994/488281 [08:09<56:48, 132.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 34
acts.shape=torch.Size([4096, 4096])
{'loss': 126.4386978149414, 'l2_loss': 67.23402404785156, 'l1_loss': 59.204673767089844}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8420326709747314 4.264944076538086 8.907845497131348
91.65%
Reconstruction: (0.916516574915735, 3.8420326709747314, 4.264944076538086, 8.907845497131


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.24it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  7%|▋         | 36022/488281 [08:14<9:27:20, 13.29it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36050/488281 [08:14<5:06:13, 24.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36078/488281 [08:14<2:58:36, 42.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36092/488281 [08:14<2:21:37, 53.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.11856842041016, 'l2_loss': 66.37240600585938, 'l1_loss': 58.74616241455078}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 36120/488281 [08:15<1:46:24, 70.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36148/488281 [08:15<1:20:20, 93.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36176/488281 [08:15<1:07:35, 111.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36204/488281 [08:15<1:01:21, 122.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.69824981689453, 'l2_loss': 66.73258972167969, 'l1_loss': 58.965660095214844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  7%|▋         | 36232/488281 [08:15<58:16, 129.30it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36260/488281 [08:16<56:45, 132.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36274/488281 [08:16<56:19, 133.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 36302/488281 [08:16<1:10:02, 107.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.55859375, 'l2_loss': 66.70657348632812, 'l1_loss': 58.85201644897461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.

  7%|▋         | 36330/488281 [08:16<1:02:30, 120.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36358/488281 [08:16<58:50, 128.02it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36386/488281 [08:17<57:02, 132.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36414/488281 [08:17<56:09, 134.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.91908264160156, 'l2_loss': 66.78417205810547, 'l1_loss': 59.13490676879883}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  7%|▋         | 36442/488281 [08:17<55:43, 135.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36470/488281 [08:17<55:30, 135.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  7%|▋         | 36498/488281 [08:18<1:05:27, 115.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.56438446044922, 'l2_loss': 66.94367980957031, 'l1_loss': 58.620704650878906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  7%|▋         | 36526/488281 [08:18<1:00:17, 124.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36554/488281 [08:18<57:43, 130.43it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36582/488281 [08:18<56:27, 133.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  7%|▋         | 36610/488281 [08:18<56:14, 133.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.03357696533203, 'l2_loss': 66.56219482421875, 'l1_loss': 58.47138214111328}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 36638/488281 [08:19<55:46, 134.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 36666/488281 [08:19<55:30, 135.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 36694/488281 [08:19<1:05:38, 114.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 36722/488281 [08:19<1:00:22, 124.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.06039428710938, 'l2_loss': 66.47840881347656, 'l1_loss': 58.58198547363281}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 36750/488281 [08:19<57:45, 130.29it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 36778/488281 [08:20<56:28, 133.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 36806/488281 [08:20<55:52, 134.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.86796569824219, 'l2_loss': 66.78482055664062, 'l1_loss': 59.08314514160156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 36834/488281 [08:20<55:33, 135.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 36862/488281 [08:20<55:24, 135.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 36876/488281 [08:20<1:08:57, 109.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 36904/488281 [08:21<1:01:59, 121.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.72187805175781, 'l2_loss': 66.58544921875, 'l1_loss': 59.13642501831055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  8%|▊         | 36932/488281 [08:21<58:31, 128.53it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 36960/488281 [08:21<56:50, 132.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 36988/488281 [08:21<55:59, 134.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.25930786132812,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 13.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 11.19it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 11.14it/s]
  8%|▊         | 37002/488281 [08:26<13:12:50,  9.49it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37030/488281 [08:26<6:56:43, 18.05it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37044/488281 [08:26<5:08:19, 24.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37071/488281 [08:27<3:14:22, 38.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37099/488281 [08:27<2:03:15, 61.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.83350372314453, 'l2_loss': 66.90965270996094, 'l1_loss': 58.923851013183594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 37127/488281 [08:27<1:28:32, 84.93it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37155/488281 [08:27<1:11:32, 105.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37183/488281 [08:27<1:03:15, 118.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37211/488281 [08:28<59:13, 126.93it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.70636749267578, 'l2_loss': 66.67549896240234, 'l1_loss': 59.03086853027344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 37239/488281 [08:28<57:13, 131.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37267/488281 [08:28<1:06:24, 113.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37295/488281 [08:28<1:00:42, 123.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.91695404052734,

  8%|▊         | 37323/488281 [08:29<57:57, 129.66it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37351/488281 [08:29<56:34, 132.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37379/488281 [08:29<55:54, 134.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37407/488281 [08:29<55:37, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.68679809570312, 'l2_loss': 66.2858657836914, 'l1_loss': 58.400936126708984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 37435/488281 [08:29<55:27, 135.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37463/488281 [08:30<1:05:15, 115.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37491/488281 [08:30<1:00:08, 124.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37519/488281 [08:30<57:39, 130.31it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.90556335449219, 'l2_loss': 66.48605346679688, 'l1_loss': 58.41950988769531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 37547/488281 [08:30<56:25, 133.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37575/488281 [08:30<55:47, 134.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37603/488281 [08:31<55:33, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.19805145263672, 'l2_loss': 66.77192687988281, 'l1_loss': 58.426124572753906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 37631/488281 [08:31<55:32, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37645/488281 [08:31<1:08:15, 110.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37673/488281 [08:31<1:01:36, 121.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37701/488281 [08:31<58:22, 128.64it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.94902038574219, 'l2_loss': 66.67748260498047, 'l1_loss': 58.27153396606445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 37729/488281 [08:32<56:44, 132.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37757/488281 [08:32<55:56, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37785/488281 [08:32<55:34, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37813/488281 [08:32<55:24, 135.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.97416687011719, 'l2_loss': 66.98688507080078, 'l1_loss': 58.98728561401367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37841/488281 [08:33<1:05:21, 114.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 37869/488281 [08:33<1:00:10, 124.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37897/488281 [08:33<57:37, 130.27it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.63966369628906, 'l2_loss': 66.27615356445312, 'l1_loss': 58.36350631713867}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 37925/488281 [08:33<56:23, 133.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37953/488281 [08:33<55:47, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37981/488281 [08:34<55:28, 135.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 37995/488281 [08:34<55:24, 135.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 35
acts.shape=torch.Size([4096, 4096])
{'loss': 125.07879638671875, 'l2_loss': 66.43119812011719, 'l1_loss': 58.6475944519043}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.846102476119995 4.271727561950684 8.941385269165039
91.65%
Reconstruction: (0.9164668374419457, 3.846102476119995, 4.271727561950684, 8.941385269165039)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.32it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 12.14it/s][A

acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 12.27it/s]

acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  8%|▊         | 38009/488281 [08:38<13:12:55,  9.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 38033/488281 [08:39<7:32:28, 16.58it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38061/488281 [08:39<4:03:49, 30.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38089/488281 [08:39<2:26:17, 51.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38117/488281 [08:39<1:40:00, 75.02it/s]

{'loss': 125.95853424072266, 'l2_loss': 67.07600402832031, 'l1_loss': 58.882530212402344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 38145/488281 [08:40<1:17:08, 97.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38173/488281 [08:40<1:06:04, 113.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38201/488281 [08:40<1:00:40, 123.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.43241882324219, 'l2_loss': 66.896484375, 'l1_loss': 58.53593063354492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 38229/488281 [08:40<1:07:13, 111.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 38257/488281 [08:40<1:01:10, 122.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38285/488281 [08:41<58:12, 128.83it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38313/488281 [08:41<56:47, 132.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.1559066772461, 'l2_loss': 66.45756530761719, 'l1_loss': 58.698341369628906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 38341/488281 [08:41<56:03, 133.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38369/488281 [08:41<55:43, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38397/488281 [08:41<55:33, 134.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 38425/488281 [08:42<1:04:25, 116.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.63999938964844, 'l2_loss': 66.85872650146484, 'l1_loss': 58.781272888183594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 38453/488281 [08:42<59:47, 125.38it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38481/488281 [08:42<57:31, 130.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38509/488281 [08:42<56:26, 132.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.43177032470703, 'l2_loss': 66.45974731445312, 'l1_loss': 58.972023010253906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 38537/488281 [08:43<55:51, 134.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38565/488281 [08:43<55:36, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38579/488281 [08:43<55:31, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 38607/488281 [08:43<1:04:27, 116.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.59732055664062, 'l2_loss': 66.77474212646484, 'l1_loss': 58.82258224487305}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 38635/488281 [08:43<59:46, 125.37it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38663/488281 [08:44<57:37, 130.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38691/488281 [08:44<56:26, 132.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.60105895996094,

  8%|▊         | 38719/488281 [08:44<55:52, 134.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38747/488281 [08:44<55:34, 134.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38775/488281 [08:44<55:27, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 38803/488281 [08:45<1:05:22, 114.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.26848602294922, 'l2_loss': 66.68966674804688, 'l1_loss': 58.578819274902344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 38831/488281 [08:45<1:00:13, 124.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38859/488281 [08:45<57:40, 129.86it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38887/488281 [08:45<56:27, 132.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38915/488281 [08:46<55:53, 134.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.13906860351562, 'l2_loss': 66.8619155883789, 'l1_loss': 58.27714920043945}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  8%|▊         | 38943/488281 [08:46<55:35, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 38971/488281 [08:46<55:25, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 38999/488281 [08:46<1:04:16, 116.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.48808288574219, 'l2_loss': 66.25918579101562, 'l1_l


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 11.44it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 10.52it/s]
  8%|▊         | 39012/488281 [08:51<13:33:50,  9.20it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39040/488281 [08:51<7:02:50, 17.71it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39068/488281 [08:51<3:54:23, 31.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39096/488281 [08:52<2:22:48, 52.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39124/488281 [08:52<1:38:06, 76.30it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 125.09054565429688, 'l2_loss': 66.6943359375, 'l1_loss': 58.39621353149414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

  8%|▊         | 39152/488281 [08:52<1:16:22, 98.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39166/488281 [08:52<1:10:02, 106.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39194/488281 [08:52<1:12:08, 103.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39222/488281 [08:53<1:03:32, 117.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.09526824951172, 'l2_loss': 66.40382385253906, 'l1_loss': 58.691444396972656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 39250/488281 [08:53<59:18, 126.18it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39278/488281 [08:53<57:12, 130.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39306/488281 [08:53<56:13, 133.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.20781707763672, 'l2_loss': 67.23202514648438, 'l1_loss': 58.975791931152344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 39334/488281 [08:53<55:44, 134.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39348/488281 [08:53<55:35, 134.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39376/488281 [08:54<1:04:43, 115.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39404/488281 [08:54<59:55, 124.83it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.86360168457031, 'l2_loss': 67.05093383789062, 'l1_loss': 58.81266784667969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 39432/488281 [08:54<57:30, 130.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39460/488281 [08:54<56:21, 132.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39488/488281 [08:55<55:45, 134.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39516/488281 [08:55<55:29, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.39349365234375, 'l2_loss': 66.65740966796875, 'l1_loss': 58.736080169677734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 39544/488281 [08:55<55:21, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39572/488281 [08:55<1:05:39, 113.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39600/488281 [08:56<1:00:19, 123.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.67864990234375, 'l2_loss': 66.7252197265625, 'l1_loss': 58.953433990478516}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 39628/488281 [08:56<57:43, 129.52it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39656/488281 [08:56<56:27, 132.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39684/488281 [08:56<55:55, 133.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39712/488281 [08:56<55:34, 134.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.55328369140625, 'l2_loss': 66.80095672607422, 'l1_loss': 58.75232696533203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 39740/488281 [08:57<55:21, 135.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39768/488281 [08:57<1:04:39, 115.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39796/488281 [08:57<59:50, 124.89it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39824/488281 [08:57<57:28, 130.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.58193969726562, 'l2_loss': 66.90196990966797, 'l1_loss': 58.67997360229492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 39852/488281 [08:57<56:17, 132.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39880/488281 [08:58<55:43, 134.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39908/488281 [08:58<55:29, 134.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.68058776855469, 'l2_loss': 66.76377868652344, 'l1_loss': 58.916805267333984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 39922/488281 [08:58<55:23, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39950/488281 [08:58<1:04:10, 116.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 39978/488281 [08:58<59:35, 125.37it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 39992/488281 [08:59<58:14, 128.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 36
acts.shape=torch.Size([4096, 4096])
{'loss': 125.56373596191406, 'l2_loss': 66.93403625488281, 'l1_loss': 58.629695892333984}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9043376445770264 4.32365608215332 8.95392894744873
91.70%
Reconstruction: (0.9169599255809421, 3.9043376445770264, 4.32365608215332, 8.95392894744873)



  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:00<00:00,  9.74it/s][A

acts.shape=torch.Size([65536, 4096])



 40%|████      | 2/5 [00:00<00:00,  9.64it/s][A

acts.shape=torch.Size([65536, 4096])



 60%|██████    | 3/5 [00:00<00:00,  9.77it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00,  9.81it/s][A
  8%|▊         | 40006/488281 [09:03<13:28:05,  9.25it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 40034/488281 [09:04<7:04:05, 17.62it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40062/488281 [09:04<3:55:55, 31.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40090/488281 [09:04<2:23:42, 51.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40118/488281 [09:04<1:38:32, 75.80it/s]

{'loss': 125.396240234375, 'l2_loss': 66.8720932006836, 'l1_loss': 58.52414321899414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

  8%|▊         | 40146/488281 [09:05<1:25:57, 86.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

  8%|▊         | 40174/488281 [09:05<1:10:24, 106.07it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 40202/488281 [09:05<1:02:39, 119.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.86431884765625, 'l2_loss': 66.98248291015625, 'l1_loss': 58.881832122802734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 40230/488281 [09:05<58:50, 126.92it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40258/488281 [09:05<56:57, 131.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40286/488281 [09:06<56:01, 133.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40314/488281 [09:06<55:35, 134.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.76106262207031, 'l2_loss': 67.27925109863281, 'l1_loss': 58.481815338134766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  8%|▊         | 40342/488281 [09:06<1:04:27, 115.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 40370/488281 [09:06<59:40, 125.09it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40398/488281 [09:06<57:20, 130.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.1273193359375, 

  8%|▊         | 40426/488281 [09:07<56:13, 132.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40454/488281 [09:07<55:39, 134.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40482/488281 [09:07<55:21, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40510/488281 [09:07<55:16, 135.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.41742706298828, 'l2_loss': 66.77349853515625, 'l1_loss': 58.64392852783203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 40524/488281 [09:07<1:09:16, 107.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 40552/488281 [09:08<1:02:03, 120.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40580/488281 [09:08<58:28, 127.61it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40608/488281 [09:08<56:45, 131.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.54743957519531, 'l2_loss': 66.89328002929688, 'l1_loss': 58.65415954589844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 40636/488281 [09:08<55:55, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40664/488281 [09:08<55:28, 134.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40692/488281 [09:09<55:37, 134.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.45428466796875, 'l2_loss': 66.8053970336914, 'l1_loss': 58.64889144897461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 40720/488281 [09:09<1:04:43, 115.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 40748/488281 [09:09<59:47, 124.76it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40776/488281 [09:09<57:23, 129.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40804/488281 [09:10<56:16, 132.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.78068542480469, 'l2_loss': 66.56074523925781, 'l1_loss': 58.21994400024414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  8%|▊         | 40832/488281 [09:10<55:39, 133.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40860/488281 [09:10<55:23, 134.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40888/488281 [09:10<55:11, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 40916/488281 [09:11<1:03:58, 116.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.70540618896484, 'l2_loss': 67.068115234375, 'l1_loss': 58.637290954589844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  8%|▊         | 40944/488281 [09:11<59:21, 125.59it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 40972/488281 [09:11<57:05, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 41000/488281 [09:11<55:57, 133.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.36896514892578, 'l2_loss': 66.85574340820312, 'l1_loss': 58.513221740722656}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
ac


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.42it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  8%|▊         | 41028/488281 [09:16<9:19:31, 13.32it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 41056/488281 [09:16<5:02:09, 24.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 41084/488281 [09:16<2:56:03, 42.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  8%|▊         | 41097/488281 [09:16<2:34:46, 48.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.67578125, 'l2_loss': 66.33546447753906, 'l1_loss': 58.34031295776367}
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41111/488281 [09:17<2:04:24, 59.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41125/488281 [09:17<1:43:19, 72.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41139/488281 [09:17<1:28:41, 84.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41153/488281 [09:17<1:18:29, 94.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41167/488281 [09:17<1:11:22, 104.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41181/488281 [09:17<1:06:38, 111.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41195/488281 [09:17<1:03:06, 118.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41209/488281 [09:17<1:00:40, 122.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.47257232666016, 'l2_loss': 67.00188446044922, 'l1_loss': 58.47068786621094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41223/488281 [09:17<58:55, 126.43it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41237/488281 [09:17<57:42, 129.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41251/488281 [09:18<56:52, 131.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41265/488281 [09:18<56:16, 132.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41279/488281 [09:18<55:51, 133.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41293/488281 [09:18<1:08:38, 108.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.56072998046875, 'l2_loss': 66.73431396484375, 'l1_loss': 58.826416015625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41307/488281 [09:18<1:04:33, 115.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41321/488281 [09:18<1:01:38, 120.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41335/488281 [09:18<59:36, 124.97it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41349/488281 [09:18<58:11, 128.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41363/488281 [09:18<57:11, 130.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41377/488281 [09:19<56:29, 131.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41391/488281 [09:19<55:59, 133.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41405/488281 [09:19<55:40, 133.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.36897277832031, 'l2_loss': 66.95977783203125, 'l1_loss': 58.40919494628906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41419/488281 [09:19<55:25, 134.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41433/488281 [09:19<55:16, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41447/488281 [09:19<55:09, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41461/488281 [09:19<55:03, 135.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  8%|▊         | 41489/488281 [09:19<1:04:02, 116.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 41517/488281 [09:20<59:23, 125.38it/s]  

acts.shape=torch.Size([4096, 4096])
{'loss': 126.49598693847656, 'l2_loss': 67.32821655273438, 'l1_loss': 59.16777420043945}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▊         | 41545/488281 [09:20<57:04, 130.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 41573/488281 [09:20<55:55, 133.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 41601/488281 [09:20<55:24, 134.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.979736328125, 'l2_loss': 67.05735778808594, 'l1_loss': 58.92238235473633}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  9%|▊         | 41629/488281 [09:21<55:06, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 41657/488281 [09:21<54:58, 135.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 41685/488281 [09:21<1:04:35, 115.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 41713/488281 [09:21<1:00:01, 123.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.66215515136719, 'l2_loss': 66.9551010131836, 'l1_loss': 58.70705795288086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  9%|▊         | 41741/488281 [09:21<57:21, 129.74it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 41769/488281 [09:22<56:03, 132.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 41797/488281 [09:22<55:25, 134.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 41825/488281 [09:22<55:08, 134.94it/s]

{'loss': 125.93592834472656, 'l2_loss': 67.31553649902344, 'l1_loss': 58.62039566040039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▊         | 41853/488281 [09:22<54:58, 135.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 41881/488281 [09:23<1:04:01, 116.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 41909/488281 [09:23<59:21, 125.33it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.95307922363281, 'l2_loss': 66.99241638183594, 'l1_loss': 58.96066665649414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▊         | 41937/488281 [09:23<57:01, 130.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 41965/488281 [09:23<55:52, 133.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 41993/488281 [09:23<55:19, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.40it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 12.78it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 12.36it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])



  9%|▊         | 42007/488281 [09:28<13:10:55,  9.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42021/488281 [09:28<9:30:05, 13.05it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42035/488281 [09:28<6:55:30, 17.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42048/488281 [09:28<5:26:01, 22.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42062/488281 [09:29<4:03:26, 30.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42076/488281 [09:29<3:06:14, 39.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42090/488281 [09:29<2:26:31, 50.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.32796478271484, 'l2_loss': 66.69029998779297, 'l1_loss': 58.637664794921875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42104/488281 [09:29<1:58:55, 62.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42118/488281 [09:29<1:39:37, 74.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42132/488281 [09:29<1:26:09, 86.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42146/488281 [09:29<1:16:45, 96.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42160/488281 [09:29<1:10:09, 105.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42174/488281 [09:29<1:05:33, 113.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42188/488281 [09:30<1:02:20, 119.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.13763427734375, 'l2_loss': 66.68858337402344, 'l1_loss': 58.44905090332031}


  9%|▊         | 42202/488281 [09:30<1:00:29, 122.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42216/488281 [09:30<58:50, 126.34it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42230/488281 [09:30<57:39, 128.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42258/488281 [09:30<1:13:56, 100.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42286/488281 [09:30<1:04:11, 115.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 42314/488281 [09:31<59:27, 124.99it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.666015625, 'l2_loss': 66.56771087646484, 'l1_loss': 58.09830093383789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch

  9%|▊         | 42342/488281 [09:31<57:04, 130.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 42370/488281 [09:31<55:54, 132.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 42398/488281 [09:31<55:22, 134.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.1772689819336, 'l2_loss': 66.66726684570312, 'l1_loss': 58.51000213623047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  9%|▊         | 42426/488281 [09:31<55:06, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42454/488281 [09:32<1:04:23, 115.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42482/488281 [09:32<59:29, 124.88it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 42510/488281 [09:32<57:07, 130.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.50018310546875, 'l2_loss': 67.05203247070312, 'l1_loss': 58.448150634765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  9%|▊         | 42538/488281 [09:32<55:55, 132.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 42566/488281 [09:33<55:21, 134.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 42594/488281 [09:33<55:04, 134.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 42622/488281 [09:33<54:56, 135.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.21420288085938, 'l2_loss': 66.7845458984375, 'l1_loss': 58.42965316772461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42636/488281 [09:33<1:08:31, 108.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▊         | 42664/488281 [09:33<1:01:30, 120.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▊         | 42692/488281 [09:34<58:04, 127.88it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.19615173339844, 'l2_loss': 66.83860778808594, 'l1_loss': 58.357547760009766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  9%|▊         | 42720/488281 [09:34<56:36, 131.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 42748/488281 [09:34<55:39, 133.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 42776/488281 [09:34<55:12, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 42804/488281 [09:34<55:01, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.27937316894531, 'l2_loss': 67.16706848144531, 'l1_loss': 58.1123046875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 42832/488281 [09:35<1:04:21, 115.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 42860/488281 [09:35<59:27, 124.84it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 42888/488281 [09:35<57:03, 130.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 42916/488281 [09:35<55:52, 132.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.99862670898438, 'l2_loss': 66.72402954101562, 'l1_loss': 58.274600982666016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  9%|▉         | 42944/488281 [09:36<55:17, 134.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 42972/488281 [09:36<54:59, 134.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43000/488281 [09:36<54:53, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.63880920410156, 'l2_loss': 67.2816390991211, 'l1_loss': 58.3571662902832}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.907884120941162 4.333537578


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.23it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  9%|▉         | 43014/488281 [09:41<13:04:46,  9.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 43042/488281 [09:41<6:52:27, 17.99it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43070/488281 [09:41<3:50:01, 32.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43098/488281 [09:41<2:20:36, 52.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.08648681640625, 'l2_loss': 67.49990844726562, 'l1_loss': 58.58658218383789}
acts.shape=

  9%|▉         | 43126/488281 [09:41<1:36:50, 76.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43154/488281 [09:42<1:15:20, 98.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43182/488281 [09:42<1:04:50, 114.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43196/488281 [09:42<1:01:48, 120.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.63710021972656, 'l2_loss': 66.65931701660156, 'l1_loss': 57.977779388427734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 43224/488281 [09:42<1:07:34, 109.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43252/488281 [09:42<1:01:00, 121.56it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 43280/488281 [09:43<57:49, 128.27it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43308/488281 [09:43<56:15, 131.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.19657135009766, 'l2_loss': 66.74312591552734, 'l1_loss': 58.45344543457031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 43336/488281 [09:43<55:26, 133.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43364/488281 [09:43<55:03, 134.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43378/488281 [09:43<54:57, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 43406/488281 [09:44<1:03:58, 115.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.11901092529297, 'l2_loss': 67.17679595947266, 'l1_loss': 58.94221496582031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 43434/488281 [09:44<59:15, 125.12it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43462/488281 [09:44<56:54, 130.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43490/488281 [09:44<55:46, 132.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.19129943847656,

  9%|▉         | 43518/488281 [09:45<55:14, 134.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43546/488281 [09:45<54:56, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43574/488281 [09:45<54:49, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 43602/488281 [09:45<1:03:43, 116.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.90129852294922, 'l2_loss': 66.77565002441406, 'l1_loss': 58.125648498535156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  9%|▉         | 43630/488281 [09:45<59:05, 125.42it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43658/488281 [09:46<56:49, 130.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43686/488281 [09:46<55:42, 133.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43714/488281 [09:46<55:12, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.57544708251953, 'l2_loss': 67.30909729003906, 'l1_loss': 58.26634979248047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 43742/488281 [09:46<55:09, 134.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 43770/488281 [09:46<54:56, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 43798/488281 [09:47<1:03:19, 116.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.90035247802734, 'l2_loss': 66.5463638305664, 'l1_lo

  9%|▉         | 43826/488281 [09:47<58:55, 125.71it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43854/488281 [09:47<56:44, 130.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43882/488281 [09:47<55:39, 133.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43910/488281 [09:48<55:09, 134.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.56494140625, 'l2_loss': 66.5854263305664, 'l1_loss': 57.97951126098633}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

  9%|▉         | 43938/488281 [09:48<54:54, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 43966/488281 [09:48<54:46, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 43980/488281 [09:48<1:06:37, 111.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 43994/488281 [09:48<1:03:01, 117.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 38
acts.shape=torch.Size([4096, 4096])
{'loss': 124.72772216796875, 'l2_loss': 66.39590454101562, 'l1_loss': 58.33181381225586}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9065098762512207 4.322759628295898 8.925034523010254
91.71%
Reconstruction: (0.9170573462634098, 3.9065098762512207, 4.322759628295898, 8.925034523010254)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.88it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.78it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  9%|▉         | 44021/488281 [09:53<9:28:55, 13.01it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44049/488281 [09:53<5:04:40, 24.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44077/488281 [09:53<2:56:41, 41.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44105/488281 [09:54<1:54:21, 64.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.71804809570312, 'l2_loss': 66.98823547363281, 'l1_loss': 58.72981262207031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 44133/488281 [09:54<1:23:53, 88.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44147/488281 [09:54<1:15:06, 98.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44175/488281 [09:54<1:12:49, 101.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44203/488281 [09:54<1:03:32, 116.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.8963623046875, 'l2_loss': 67.48057556152344, 'l1_loss': 58.41578674316406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  9%|▉         | 44231/488281 [09:55<58:57, 125.51it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44259/488281 [09:55<56:57, 129.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44287/488281 [09:55<55:44, 132.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44315/488281 [09:55<55:10, 134.11it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 125.71949768066406, 'l2_loss': 67.15779113769531, 'l1_loss': 58.561710357666016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  9%|▉         | 44343/488281 [09:55<54:53, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44371/488281 [09:56<1:03:18, 116.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44399/488281 [09:56<58:51, 125.67it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.00425720214844, 'l2_loss': 66.69153594970703, 'l1_loss': 58.31271743774414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 44427/488281 [09:56<56:44, 130.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44455/488281 [09:56<55:36, 133.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44483/488281 [09:57<55:06, 134.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44511/488281 [09:57<54:51, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.64346313476562, 'l2_loss': 66.5489730834961, 'l1_loss': 58.094486236572266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 44539/488281 [09:57<54:42, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44567/488281 [09:57<1:02:56, 117.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44595/488281 [09:57<58:38, 126.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44623/488281 [09:58<56:34, 130.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.87346649169922, 'l2_loss': 66.6007308959961, 'l1_loss': 58.272735595703125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 44651/488281 [09:58<55:33, 133.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44679/488281 [09:58<55:01, 134.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44707/488281 [09:58<54:49, 134.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.40469360351562, 'l2_loss': 66.99148559570312, 'l1_loss': 58.4132080078125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  9%|▉         | 44735/488281 [09:58<54:42, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44749/488281 [09:59<1:06:30, 111.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44777/488281 [09:59<1:00:43, 121.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44805/488281 [09:59<57:36, 128.30it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.07180786132812, 'l2_loss': 67.06932067871094, 'l1_loss': 58.00249099731445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 44833/488281 [09:59<56:02, 131.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44861/488281 [09:59<55:16, 133.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44889/488281 [10:00<54:53, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.7484359741211, 

  9%|▉         | 44917/488281 [10:00<54:42, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44945/488281 [10:00<1:03:04, 117.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 44973/488281 [10:00<58:42, 125.85it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 44987/488281 [10:00<57:26, 128.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.43708801269531, 'l2_loss': 67.07693481445312, 'l1_loss': 58.36015701293945}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.890571117401123 4.3131656


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 14.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 11.46it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 11.39it/s]
  9%|▉         | 45015/488281 [10:05<9:20:38, 13.18it/s] 

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 45043/488281 [10:05<5:02:29, 24.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45071/488281 [10:06<2:56:02, 41.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45099/488281 [10:06<1:54:02, 64.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45113/488281 [10:06<1:36:12, 76.77it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 125.41384887695312, 'l2_loss': 66.9083251953125, 'l1_loss': 58.50551986694336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 45141/488281 [10:06<1:30:47, 81.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 45169/488281 [10:07<1:12:16, 102.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45197/488281 [10:07<1:03:12, 116.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.218994140625, '

  9%|▉         | 45225/488281 [10:07<58:45, 125.67it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45253/488281 [10:07<56:35, 130.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45281/488281 [10:07<55:53, 132.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 45309/488281 [10:08<55:13, 133.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.63189697265625, 'l2_loss': 66.74624633789062, 'l1_loss': 57.885650634765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  9%|▉         | 45337/488281 [10:08<1:04:04, 115.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 45365/488281 [10:08<59:11, 124.72it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45393/488281 [10:08<56:46, 130.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45421/488281 [10:08<55:35, 132.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34222412109375, 'l2_loss': 66.95113372802734, 'l1_loss': 57.39109420776367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 45449/488281 [10:09<55:01, 134.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45477/488281 [10:09<54:44, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45491/488281 [10:09<54:39, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.67082214355469, 'l2_loss': 67.15496063232422, 'l1_loss': 58.515865325927734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 45519/488281 [10:09<1:03:28, 116.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 45547/488281 [10:09<58:52, 125.35it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45575/488281 [10:10<56:35, 130.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45603/488281 [10:10<55:32, 132.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.3340835571289, 'l2_loss': 67.25385284423828, 'l1_loss': 59.080230712890625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 45631/488281 [10:10<54:56, 134.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45659/488281 [10:10<54:42, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45687/488281 [10:11<54:34, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 45715/488281 [10:11<1:04:05, 115.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.18421173095703, 'l2_loss': 67.32515716552734, 'l1_loss': 57.85905456542969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 45743/488281 [10:11<59:09, 124.69it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45771/488281 [10:11<56:41, 130.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45799/488281 [10:11<55:39, 132.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.41658020019531, 'l2_loss': 66.96076965332031, 'l1_loss': 58.455810546875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

  9%|▉         | 45827/488281 [10:12<55:04, 133.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45855/488281 [10:12<54:42, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45883/488281 [10:12<54:34, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 45911/488281 [10:12<1:03:43, 115.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.17373657226562, 'l2_loss': 66.88697814941406, 'l1_loss': 58.28676223754883}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

  9%|▉         | 45939/488281 [10:13<58:56, 125.08it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45967/488281 [10:13<56:36, 130.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 45995/488281 [10:13<55:26, 132.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.38it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.88it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



  9%|▉         | 46023/488281 [10:18<9:15:11, 13.28it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 46051/488281 [10:18<4:59:46, 24.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 46079/488281 [10:18<2:54:37, 42.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 46092/488281 [10:18<2:32:13, 48.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.02654266357422, 'l2_loss': 66.70936584472656, 'l1_loss': 58.317176818847656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  9%|▉         | 46120/488281 [10:18<1:41:49, 72.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 46148/488281 [10:19<1:17:27, 95.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 46176/488281 [10:19<1:05:38, 112.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 46204/488281 [10:19<59:53, 123.01it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.0000228881836, 'l2_loss': 66.31961059570312, 'l1_loss': 57.68041229248047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

  9%|▉         | 46232/488281 [10:19<57:04, 129.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 46260/488281 [10:20<55:43, 132.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 46288/488281 [10:20<1:03:40, 115.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


  9%|▉         | 46316/488281 [10:20<58:56, 124.98it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.51225280761719, 'l2_loss': 66.75144958496094, 'l1_loss': 58.760799407958984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

  9%|▉         | 46344/488281 [10:20<56:34, 130.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

  9%|▉         | 46372/488281 [10:20<55:25, 132.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46400/488281 [10:21<54:51, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.47769165039062, 'l2_loss': 67.05038452148438, 'l1_loss': 58.42730712890625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|▉         | 46428/488281 [10:21<54:35, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46456/488281 [10:21<54:28, 135.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 46484/488281 [10:21<1:02:58, 116.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 46512/488281 [10:22<58:35, 125.68it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.77596282958984, 'l2_loss': 66.4788818359375, 'l1_loss': 58.297080993652344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|▉         | 46540/488281 [10:22<56:23, 130.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46568/488281 [10:22<55:19, 133.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46596/488281 [10:22<54:46, 134.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.05084228515625,

 10%|▉         | 46624/488281 [10:22<54:31, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46652/488281 [10:23<54:25, 135.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 46680/488281 [10:23<1:02:38, 117.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 46708/488281 [10:23<58:25, 125.97it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.7110824584961, 'l2_loss': 66.55766296386719, 'l1_loss': 58.153419494628906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|▉         | 46736/488281 [10:23<56:17, 130.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46764/488281 [10:23<55:16, 133.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46792/488281 [10:24<54:45, 134.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46820/488281 [10:24<54:40, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.5333251953125, 'l2_loss': 66.61656188964844, 'l1_loss': 57.91676712036133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 10%|▉         | 46834/488281 [10:24<54:35, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 46862/488281 [10:24<1:02:57, 116.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 46890/488281 [10:24<58:29, 125.76it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.64476013183594,

 10%|▉         | 46918/488281 [10:25<56:22, 130.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46946/488281 [10:25<55:17, 133.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46974/488281 [10:25<54:46, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 46988/488281 [10:25<54:36, 134.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.38320922851562, 'l2_loss': 66.94393920898438, 'l1_loss': 58.43927001953125}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.843888521194458 4.283840656280518 8.967206001281738
91.41%
Reconstruction: (0.9141274893082432, 3.843888521194458, 4.283840656280518, 8.967206001281738)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 14.68it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 11.49it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 11.43it/s]
 10%|▉         | 47002/488281 [10:30<12:52:00,  9.53it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47030/488281 [10:30<6:45:56, 18.12it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47056/488281 [10:30<4:07:31, 29.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47084/488281 [10:31<2:27:27, 49.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47112/488281 [10:31<1:39:33, 73.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.28732299804688, 'l2_loss': 66.6576156616211, 'l1_loss': 58.62971115112305}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 10%|▉         | 47140/488281 [10:31<1:16:22, 96.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47168/488281 [10:31<1:05:03, 112.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47196/488281 [10:31<59:31, 123.49it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.67984008789062, 'l2_loss': 66.76284790039062, 'l1_loss': 57.916988372802734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 10%|▉         | 47224/488281 [10:32<56:51, 129.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47252/488281 [10:32<1:04:35, 113.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47280/488281 [10:32<59:19, 123.91it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47308/488281 [10:32<57:01, 128.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.52682495117188, 'l2_loss': 66.58686065673828, 'l1_loss': 57.939964294433594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 10%|▉         | 47336/488281 [10:33<55:35, 132.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47364/488281 [10:33<54:53, 133.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47392/488281 [10:33<54:32, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47420/488281 [10:33<54:24, 135.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.27547454833984, 'l2_loss': 66.74095916748047, 'l1_loss': 58.534515380859375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47448/488281 [10:33<1:02:40, 117.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47476/488281 [10:34<58:20, 125.93it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47504/488281 [10:34<56:16, 130.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.43795776367188, 'l2_loss': 66.98822784423828, 'l1_loss': 58.44973373413086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|▉         | 47532/488281 [10:34<55:12, 133.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47560/488281 [10:34<54:42, 134.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47588/488281 [10:34<54:28, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47602/488281 [10:35<54:23, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.27796173095703, 'l2_loss': 66.99243927001953, 'l1_loss': 58.2855224609375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47630/488281 [10:35<1:02:27, 117.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47658/488281 [10:35<58:12, 126.17it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47686/488281 [10:35<56:08, 130.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47714/488281 [10:35<55:09, 133.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.08790588378906, 'l2_loss': 66.76728057861328, 'l1_loss': 58.32062530517578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|▉         | 47742/488281 [10:36<54:38, 134.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47770/488281 [10:36<54:24, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47798/488281 [10:36<54:18, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.79695129394531, 'l2_loss': 66.94612121582031, 'l1_loss': 57.850826263427734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47826/488281 [10:36<1:02:28, 117.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 47854/488281 [10:37<58:26, 125.59it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47882/488281 [10:37<56:14, 130.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47910/488281 [10:37<55:13, 132.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.00880432128906, 'l2_loss': 67.29507446289062, 'l1_loss': 57.71372985839844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|▉         | 47938/488281 [10:37<54:38, 134.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47966/488281 [10:37<54:22, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 47994/488281 [10:38<54:15, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 40
acts.shape=torch.Size([4096, 4096])
{'loss':


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.33it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 10%|▉         | 48022/488281 [10:42<9:20:01, 13.10it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48050/488281 [10:43<5:01:59, 24.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48078/488281 [10:43<2:55:34, 41.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48106/488281 [10:43<1:53:39, 64.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.90227508544922, 'l2_loss': 67.14552307128906, 'l1_loss': 57.756752014160156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 10%|▉         | 48134/488281 [10:43<1:23:16, 88.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48162/488281 [10:43<1:08:24, 107.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48190/488281 [10:44<1:01:07, 120.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 48204/488281 [10:44<1:10:36, 103.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.1409912109375, 'l2_loss': 66.43624114990234, 'l1_loss': 57.704750061035156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|▉         | 48232/488281 [10:44<1:02:12, 117.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48260/488281 [10:44<58:02, 126.34it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48288/488281 [10:44<56:01, 130.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.68236541748047,

 10%|▉         | 48316/488281 [10:45<55:02, 133.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 48344/488281 [10:45<54:44, 133.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48372/488281 [10:45<54:22, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 48400/488281 [10:45<1:02:34, 117.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.57478332519531, 'l2_loss': 66.71407318115234, 'l1_loss': 57.860713958740234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 10%|▉         | 48428/488281 [10:46<58:16, 125.80it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48456/488281 [10:46<56:07, 130.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48484/488281 [10:46<55:04, 133.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48512/488281 [10:46<54:34, 134.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.95901489257812, 'l2_loss': 66.84283447265625, 'l1_loss': 58.116180419921875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 10%|▉         | 48540/488281 [10:46<54:18, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48568/488281 [10:47<54:11, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 48596/488281 [10:47<1:02:18, 117.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.46122741699219, 'l2_loss': 66.7571792602539, 'l1_lo

 10%|▉         | 48624/488281 [10:47<58:08, 126.04it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48652/488281 [10:47<56:03, 130.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48680/488281 [10:48<55:02, 133.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48708/488281 [10:48<54:32, 134.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.94954681396484, 'l2_loss': 66.46737670898438, 'l1_loss': 57.48217010498047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|▉         | 48736/488281 [10:48<54:15, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|▉         | 48764/488281 [10:48<54:09, 135.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 48792/488281 [10:48<1:02:16, 117.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|▉         | 48820/488281 [10:49<58:05, 126.08it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.60995483398438, 'l2_loss': 66.7322006225586, 'l1_loss': 57.87775421142578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 10%|█         | 48848/488281 [10:49<56:15, 130.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 48876/488281 [10:49<55:08, 132.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 48904/488281 [10:49<54:34, 134.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.3327407836914, 'l2_loss': 67.05278015136719, 'l1_loss': 58.27996063232422}
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 10%|█         | 48932/488281 [10:49<54:17, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 48946/488281 [10:50<54:13, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 48974/488281 [10:50<1:02:32, 117.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 48988/488281 [10:50<59:58, 122.09it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.47297668457031, 'l2_loss': 66.434326171875, 'l1_loss': 58.03865051269531}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8201510906219482 4.253928184509277 8.906793594360352
91.47%
Reconstruction: (0.9147223156397316, 3.8201510906219482, 4.253928184509277, 8.906793594360352)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.32it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


 10%|█         | 49001/488281 [10:55<13:00:18,  9.38it/s]

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49015/488281 [10:55<9:19:03, 13.10it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49029/488281 [10:55<6:45:56, 18.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49043/488281 [10:55<4:59:35, 24.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49057/488281 [10:55<3:45:31, 32.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49071/488281 [10:55<2:53:53, 42.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49085/488281 [10:55<2:17:49, 53.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49099/488281 [10:55<1:52:38, 64.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.55572509765625, 'l2_loss': 67.12460327148438, 'l1_loss': 58.43112564086914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|█         | 49113/488281 [10:55<1:35:03, 77.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49127/488281 [10:55<1:22:43, 88.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49141/488281 [10:56<1:14:06, 98.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49155/488281 [10:56<1:19:38, 91.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49183/488281 [10:56<1:06:33, 109.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49211/488281 [10:56<1:00:09, 121.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.43733215332031, 'l2_loss': 66.58501434326172, 'l1_loss': 57.85232162475586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|█         | 49239/488281 [10:56<57:00, 128.34it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49267/488281 [10:57<55:27, 131.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49295/488281 [10:57<54:40, 133.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49323/488281 [10:57<54:21, 134.58it/s]

{'loss': 123.78135681152344, 'l2_loss': 66.7122802734375, 'l1_loss': 57.06907653808594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 10%|█         | 49337/488281 [10:57<54:15, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49365/488281 [10:57<1:02:38, 116.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49393/488281 [10:58<58:12, 125.67it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.31234741210938,

 10%|█         | 49421/488281 [10:58<56:03, 130.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49449/488281 [10:58<54:58, 133.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49477/488281 [10:58<54:27, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49505/488281 [10:58<54:12, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.74520111083984, 'l2_loss': 67.04435729980469, 'l1_loss': 57.700843811035156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 10%|█         | 49533/488281 [10:59<54:04, 135.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49561/488281 [10:59<1:02:22, 117.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49589/488281 [10:59<58:03, 125.92it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49617/488281 [10:59<55:58, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.878173828125, 'l2_loss': 66.77812194824219, 'l1_loss': 58.10004806518555}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 10%|█         | 49645/488281 [11:00<54:55, 133.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49673/488281 [11:00<54:25, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49701/488281 [11:00<54:12, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.91252136230469,

 10%|█         | 49715/488281 [11:00<54:07, 135.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49743/488281 [11:00<1:02:18, 117.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49771/488281 [11:01<57:59, 126.02it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49799/488281 [11:01<55:52, 130.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.80537414550781, 'l2_loss': 66.8145751953125, 'l1_loss': 57.99080276489258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 10%|█         | 49827/488281 [11:01<54:53, 133.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49855/488281 [11:01<54:23, 134.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49883/488281 [11:01<54:41, 133.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49911/488281 [11:02<54:18, 134.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.40126037597656, 'l2_loss': 66.88452911376953, 'l1_loss': 57.5167350769043}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 10%|█         | 49939/488281 [11:02<1:02:24, 117.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 49967/488281 [11:02<58:02, 125.86it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 49995/488281 [11:02<55:55, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 41
acts.shape=torch.Size([4096, 4096])
{'loss':


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.27it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 10%|█         | 50023/488281 [11:07<9:10:05, 13.28it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50051/488281 [11:07<4:57:00, 24.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50079/488281 [11:07<2:52:58, 42.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50107/488281 [11:08<1:52:16, 65.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.74217224121094, 'l2_loss': 66.67587280273438, 'l1_loss': 58.0662956237793}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 50135/488281 [11:08<1:30:36, 80.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 50163/488281 [11:08<1:11:52, 101.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50191/488281 [11:08<1:02:42, 116.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50219/488281 [11:08<58:10, 125.49it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.633544921875, 'l2_loss': 67.01293182373047, 'l1_loss': 57.620609283447266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 10%|█         | 50247/488281 [11:09<55:58, 130.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50275/488281 [11:09<54:53, 133.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50303/488281 [11:09<54:22, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.0848388671875, 'l2_loss': 67.13927459716797, 'l1_loss': 57.9455680847168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 50317/488281 [11:09<1:06:01, 110.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 50345/488281 [11:09<59:48, 122.03it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50373/488281 [11:10<57:03, 127.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50401/488281 [11:10<55:25, 131.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.09915161132812, 'l2_loss': 66.986328125, 'l1_loss': 58.112823486328125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 10%|█         | 50429/488281 [11:10<54:36, 133.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50457/488281 [11:10<54:10, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50485/488281 [11:11<53:58, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 50513/488281 [11:11<1:02:16, 117.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.84209442138672, 'l2_loss': 66.99259948730469, 'l1_loss': 57.84949493408203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|█         | 50541/488281 [11:11<57:57, 125.87it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50569/488281 [11:11<55:50, 130.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50597/488281 [11:11<54:48, 133.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.53359985351562, 'l2_loss': 66.58516693115234, 'l1_loss': 57.948429107666016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 10%|█         | 50625/488281 [11:12<54:18, 134.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50653/488281 [11:12<54:02, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50681/488281 [11:12<53:55, 135.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 50709/488281 [11:12<1:02:04, 117.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.50289916992188, 'l2_loss': 66.56664276123047, 'l1_loss': 57.93625259399414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|█         | 50737/488281 [11:13<57:52, 126.01it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50765/488281 [11:13<55:46, 130.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50793/488281 [11:13<54:44, 133.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50821/488281 [11:13<54:17, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.47401428222656, 'l2_loss': 67.1502685546875, 'l1_loss': 58.3237419128418}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 10%|█         | 50849/488281 [11:13<54:00, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50877/488281 [11:14<53:55, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 50905/488281 [11:14<1:02:11, 117.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.13133239746094, 'l2_loss': 66.74532318115234, 'l1_loss': 57.386009216308594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 10%|█         | 50933/488281 [11:14<57:53, 125.91it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50961/488281 [11:14<55:45, 130.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 50989/488281 [11:14<54:43, 133.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.33it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A
 10%|█         | 51003/488281 [11:19<12:37:50,  9.62it/s]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 51017/488281 [11:19<9:06:35, 13.33it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 51045/488281 [11:19<4:55:15, 24.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 51072/488281 [11:20<3:06:29, 39.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 10%|█         | 51100/488281 [11:20<1:58:05, 61.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.20860290527344, 'l2_loss': 66.58233642578125, 'l1_loss': 57.62627029418945}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 10%|█         | 51128/488281 [11:20<1:25:08, 85.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 51156/488281 [11:20<1:09:03, 105.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 51184/488281 [11:20<1:01:13, 118.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 51212/488281 [11:21<57:26, 126.83it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.29615783691406, 'l2_loss': 67.39939880371094, 'l1_loss': 57.896759033203125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 10%|█         | 51240/488281 [11:21<55:31, 131.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 10%|█         | 51254/488281 [11:21<54:59, 132.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 51282/488281 [11:21<1:02:31, 116.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 51310/488281 [11:22<58:04, 125.40it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.504638671875, 'l2_loss': 67.36246490478516, 'l1_loss': 58.14216995239258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 11%|█         | 51338/488281 [11:22<55:51, 130.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51366/488281 [11:22<54:46, 132.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51394/488281 [11:22<54:36, 133.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 51422/488281 [11:22<54:10, 134.39it/s]

{'loss': 124.67555236816406, 'l2_loss': 66.85287475585938, 'l1_loss': 57.82267761230469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 51450/488281 [11:23<53:56, 134.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 51478/488281 [11:23<1:03:38, 114.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 51506/488281 [11:23<58:36, 124.20it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.279541015625, 'l2_loss': 67.05123901367188, 'l1_loss': 58.22829818725586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 11%|█         | 51534/488281 [11:23<56:07, 129.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51562/488281 [11:23<54:52, 132.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51590/488281 [11:24<54:17, 134.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51618/488281 [11:24<53:59, 134.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.28407287597656, 'l2_loss': 66.96244812011719, 'l1_loss': 58.32162857055664}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 51646/488281 [11:24<53:51, 135.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 51660/488281 [11:24<1:05:25, 111.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 51688/488281 [11:24<59:25, 122.47it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.64710998535156,

 11%|█         | 51716/488281 [11:25<56:30, 128.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51744/488281 [11:25<55:02, 132.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51772/488281 [11:25<54:20, 133.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51800/488281 [11:25<53:58, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.78950500488281, 'l2_loss': 66.69107055664062, 'l1_loss': 58.09843444824219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 51828/488281 [11:25<53:50, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 51856/488281 [11:26<1:02:11, 116.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 51884/488281 [11:26<57:50, 125.73it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51912/488281 [11:26<55:55, 130.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.64421081542969, 'l2_loss': 66.89939880371094, 'l1_loss': 57.744808197021484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 11%|█         | 51940/488281 [11:26<54:45, 132.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51968/488281 [11:27<54:10, 134.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 51996/488281 [11:27<53:53, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 42
acts.shape=torch.Size([4096, 4096])
{'loss': 125.57089233398438, 'l2_loss': 67.4713134765625, 'l1_loss': 58.099578857421875}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.34it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 11%|█         | 52024/488281 [11:32<9:06:50, 13.30it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52035/488281 [11:32<7:16:00, 16.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52049/488281 [11:32<5:16:23, 22.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52063/488281 [11:32<3:55:12, 30.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52077/488281 [11:32<2:59:30, 40.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52090/488281 [11:32<2:24:12, 50.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52104/488281 [11:32<1:56:20, 62.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.25737762451172, 'l2_loss': 67.34386444091797, 'l1_loss': 57.91351318359375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52118/488281 [11:32<1:37:11, 74.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52132/488281 [11:32<1:23:56, 86.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52146/488281 [11:33<1:14:45, 97.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52160/488281 [11:33<1:08:21, 106.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52174/488281 [11:33<1:03:54, 113.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52188/488281 [11:33<1:00:47, 119.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52202/488281 [11:33<58:39, 123.91it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.12713623046875, 'l2_loss': 66.54481506347656, 'l1_loss': 57.58232498168945}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52216/488281 [11:33<57:07, 127.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52244/488281 [11:33<1:03:44, 114.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52272/488281 [11:34<58:33, 124.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52300/488281 [11:34<56:12, 129.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.5811767578125, 'l2_loss': 66.682373046875, 'l1_loss': 57.8988037109375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 11%|█         | 52328/488281 [11:34<55:12, 131.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52356/488281 [11:34<54:23, 133.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52384/488281 [11:34<53:58, 134.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52412/488281 [11:35<54:05, 134.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.19149780273438, 'l2_loss': 66.64947509765625, 'l1_loss': 57.54201889038086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52440/488281 [11:35<1:02:41, 115.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52468/488281 [11:35<58:01, 125.18it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52496/488281 [11:35<55:43, 130.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52524/488281 [11:35<54:38, 132.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.65998840332031, 'l2_loss': 66.95829772949219, 'l1_loss': 57.70169448852539}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 52552/488281 [11:36<54:04, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52580/488281 [11:36<53:47, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52594/488281 [11:36<53:43, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.23619079589844, 'l2_loss': 66.60508728027344, 'l1_loss': 57.631099700927734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52622/488281 [11:36<1:02:35, 116.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52650/488281 [11:36<57:58, 125.24it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52678/488281 [11:37<55:42, 130.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52706/488281 [11:37<54:36, 132.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.64869689941406, 'l2_loss': 66.72581481933594, 'l1_loss': 57.92288589477539}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 52734/488281 [11:37<54:04, 134.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52762/488281 [11:37<53:47, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52790/488281 [11:38<53:39, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52818/488281 [11:38<1:01:43, 117.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.6300048828125, 'l2_loss': 66.85830688476562, 'l1_loss': 57.77170181274414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 11%|█         | 52846/488281 [11:38<57:32, 126.13it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52874/488281 [11:38<55:28, 130.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52902/488281 [11:38<54:31, 133.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.47954559326172, 'l2_loss': 66.96360778808594, 'l1_loss': 57.51593780517578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 52930/488281 [11:39<54:02, 134.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 52958/488281 [11:39<54:00, 134.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 52986/488281 [11:39<53:45, 134.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53000/488281 [11:39<1:05:49, 110.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.91255950927734, 'l2_loss': 67.09468078613281, 'l1_loss': 57.81787872314453}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8275306224823 4.2622175216674805 8.887593269348145
91.41%
Reconstruction: (0.9140945617630996, 3.8275306224823, 4.2622175216674805, 8.887593269348145)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.35it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53026/488281 [11:44<9:24:59, 12.84it/s] 


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53054/488281 [11:44<5:00:14, 24.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53082/488281 [11:44<2:53:30, 41.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53110/488281 [11:45<1:52:06, 64.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.59959411621094, 'l2_loss': 66.87698364257812, 'l1_loss': 57.72261428833008}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 53138/488281 [11:45<1:22:08, 88.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53166/488281 [11:45<1:07:31, 107.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53180/488281 [11:45<1:03:19, 114.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53208/488281 [11:45<1:06:20, 109.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.66212463378906, 'l2_loss': 66.78722381591797, 'l1_loss': 57.87489700317383}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 53236/488281 [11:46<59:46, 121.31it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53264/488281 [11:46<56:33, 128.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53292/488281 [11:46<54:59, 131.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53320/488281 [11:46<54:15, 133.62it/s]

{'loss': 124.69300842285156, 'l2_loss': 66.95649719238281, 'l1_loss': 57.736515045166016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 11%|█         | 53348/488281 [11:46<53:52, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53362/488281 [11:46<53:45, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53390/488281 [11:47<1:01:42, 117.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.98600769042969, 'l2_loss': 67.64968872070312, 'l1_loss': 58.33632278442383}
acts.shape=

 11%|█         | 53418/488281 [11:47<57:30, 126.03it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53446/488281 [11:47<55:34, 130.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53474/488281 [11:47<54:29, 132.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53502/488281 [11:48<54:00, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.75019073486328, 'l2_loss': 66.68067932128906, 'l1_loss': 58.06951141357422}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 53530/488281 [11:48<53:43, 134.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53558/488281 [11:48<53:36, 135.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53586/488281 [11:48<1:01:52, 117.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53614/488281 [11:48<57:38, 125.70it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.75643157958984, 'l2_loss': 66.86033630371094, 'l1_loss': 57.896095275878906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 11%|█         | 53642/488281 [11:49<55:28, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53670/488281 [11:49<54:26, 133.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53698/488281 [11:49<53:57, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.53978729248047, 'l2_loss': 67.56632232666016, 'l1_loss': 57.97346496582031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 53726/488281 [11:49<53:42, 134.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53754/488281 [11:50<53:34, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53782/488281 [11:50<1:01:38, 117.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53810/488281 [11:50<57:27, 126.02it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.46636199951172, 'l2_loss': 66.82286071777344, 'l1_loss': 57.64350128173828}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 53838/488281 [11:50<55:22, 130.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53866/488281 [11:50<54:22, 133.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53894/488281 [11:51<53:52, 134.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 53922/488281 [11:51<53:40, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.28153991699219, 'l2_loss': 67.23551940917969, 'l1_loss': 58.046016693115234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 11%|█         | 53950/488281 [11:51<53:33, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53964/488281 [11:51<1:11:06, 101.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 53992/488281 [11:51<1:02:04, 116.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 43
acts.shape=torch.Size([4096, 4096])
{'loss': 124.45364379882812, 'l2_loss': 66.86894989013672, 'l1_loss': 57.58469772338867}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.34it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 11%|█         | 54019/488281 [11:56<9:16:08, 13.01it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54047/488281 [11:56<4:57:50, 24.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 54075/488281 [11:57<2:53:12, 41.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 54103/488281 [11:57<1:52:03, 64.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.828369140625, 'l2_loss': 67.01570892333984, 'l1_loss': 57.81265640258789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 11%|█         | 54131/488281 [11:57<1:22:04, 88.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54159/488281 [11:57<1:15:29, 95.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 54187/488281 [11:58<1:04:11, 112.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54215/488281 [11:58<58:42, 123.22it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.63984680175781, 'l2_loss': 66.74436950683594, 'l1_loss': 57.89547348022461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 54243/488281 [11:58<55:57, 129.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54271/488281 [11:58<54:37, 132.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54299/488281 [11:58<53:58, 133.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54327/488281 [11:59<53:41, 134.70it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 125.48799896240234, 'l2_loss': 67.60171508789062, 'l1_loss': 57.88628387451172}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 54341/488281 [11:59<1:05:07, 111.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 54369/488281 [11:59<59:06, 122.35it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54397/488281 [11:59<56:10, 128.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.9691390991211, 

 11%|█         | 54425/488281 [11:59<54:44, 132.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54453/488281 [12:00<54:15, 133.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 54481/488281 [12:00<53:47, 134.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54509/488281 [12:00<53:35, 134.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.87538146972656, 'l2_loss': 67.34024047851562, 'l1_loss': 57.5351448059082}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 11%|█         | 54523/488281 [12:00<53:30, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

 11%|█         | 54551/488281 [12:00<1:01:37, 117.29it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 54579/488281 [12:01<57:24, 125.91it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54607/488281 [12:01<55:21, 130.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.48362731933594, 'l2_loss': 67.14815521240234, 'l1_loss': 57.33547592163086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█         | 54635/488281 [12:01<54:18, 133.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54663/488281 [12:01<53:48, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54691/488281 [12:01<53:31, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54719/488281 [12:02<53:26, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.32999420166016, 'l2_loss': 67.13247680664062, 'l1_loss': 58.19751739501953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 54733/488281 [12:02<1:04:55, 111.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 54761/488281 [12:02<58:59, 122.47it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54789/488281 [12:02<56:05, 128.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.09638214111328,

 11%|█         | 54817/488281 [12:02<54:39, 132.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54845/488281 [12:03<53:56, 133.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54873/488281 [12:03<53:37, 134.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█         | 54901/488281 [12:03<53:28, 135.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.57948303222656, 'l2_loss': 67.59722137451172, 'l1_loss': 57.98225784301758}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█         | 54929/488281 [12:03<1:01:24, 117.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 54957/488281 [12:03<57:15, 126.15it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 54985/488281 [12:04<55:33, 129.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 54999/488281 [12:04<54:52, 131.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.81856536865234, 'l2_loss': 67.32164001464844, 'l1_loss': 57.496925354003906}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8356106281280518 4.275225639343262 8.905290603637695
91.33%
Reconstruction: (0.9132854512831421, 3.8356106281280518, 4.275225639343262, 8.905290603637695)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.45it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


 80%|████████  | 4/5 [00:00<00:00, 12.30it/s][A


acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 12.21it/s]
 11%|█▏        | 55013/488281 [12:08<12:35:03,  9.56it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 55041/488281 [12:09<6:37:09, 18.18it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55069/488281 [12:09<3:41:46, 32.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55097/488281 [12:09<2:15:48, 53.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55111/488281 [12:09<2:03:20, 58.53it/s]

{'loss': 125.14378356933594, 'l2_loss': 66.95596313476562, 'l1_loss': 58.18782424926758}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 55139/488281 [12:09<1:27:36, 82.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55167/488281 [12:10<1:10:05, 102.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55195/488281 [12:10<1:01:30, 117.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.37886047363281,

 11%|█▏        | 55223/488281 [12:10<57:19, 125.92it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55251/488281 [12:10<55:11, 130.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55279/488281 [12:10<54:13, 133.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55293/488281 [12:11<53:56, 133.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.07026672363281, 'l2_loss': 66.67515563964844, 'l1_loss': 57.395111083984375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 55321/488281 [12:11<1:02:13, 115.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55349/488281 [12:11<57:37, 125.22it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55377/488281 [12:11<55:22, 130.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55405/488281 [12:11<54:18, 132.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.98487854003906, 'l2_loss': 67.0946273803711, 'l1_loss': 57.8902473449707}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 11%|█▏        | 55433/488281 [12:12<53:45, 134.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55461/488281 [12:12<53:29, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55475/488281 [12:12<53:42, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 55503/488281 [12:12<1:02:09, 116.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.21509552001953, 'l2_loss': 66.64698028564453, 'l1_loss': 57.568115234375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 11%|█▏        | 55531/488281 [12:12<57:33, 125.32it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55559/488281 [12:13<55:18, 130.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55587/488281 [12:13<54:14, 132.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55615/488281 [12:13<53:43, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.1345443725586, 'l2_loss': 66.63870239257812, 'l1_loss': 57.49584197998047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 11%|█▏        | 55643/488281 [12:13<53:26, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55671/488281 [12:14<53:18, 135.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 55699/488281 [12:14<1:02:10, 115.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.30471801757812, 'l2_loss': 66.82911682128906, 'l1_loss': 57.4755973815918}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 11%|█▏        | 55727/488281 [12:14<57:34, 125.21it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55755/488281 [12:14<55:19, 130.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55783/488281 [12:14<54:13, 132.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55811/488281 [12:15<53:41, 134.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.6383056640625, 'l2_loss': 67.32310485839844, 'l1_loss': 58.31520462036133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 11%|█▏        | 55839/488281 [12:15<53:24, 134.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55867/488281 [12:15<53:15, 135.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 55895/488281 [12:15<1:02:04, 116.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 55923/488281 [12:16<57:30, 125.32it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.93797302246094, 'l2_loss': 67.38192749023438, 'l1_loss': 57.5560417175293}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 11%|█▏        | 55951/488281 [12:16<55:17, 130.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55979/488281 [12:16<54:11, 132.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 55993/488281 [12:16<53:51, 133.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 44
acts.shape=torch.Size([4096, 4096])
{'loss': 125.48451232910156, 'l2_loss': 67.50460815429688, 'l1_loss': 57.97990417480469}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.87048


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.36it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.68it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 11%|█▏        | 56021/488281 [12:21<9:03:09, 13.26it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 56049/488281 [12:21<4:53:13, 24.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 56063/488281 [12:21<3:41:12, 32.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 11%|█▏        | 56090/488281 [12:21<2:32:10, 47.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 11%|█▏        | 56118/488281 [12:22<1:41:18, 71.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.37844848632812, 'l2_loss': 67.31260681152344, 'l1_loss': 58.06584167480469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 11%|█▏        | 56146/488281 [12:22<1:16:38, 93.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56174/488281 [12:22<1:04:37, 111.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56202/488281 [12:22<58:46, 122.51it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.24058532714844, 'l2_loss': 67.14205169677734, 'l1_loss': 58.098533630371094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 12%|█▏        | 56230/488281 [12:22<55:53, 128.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56244/488281 [12:23<55:03, 130.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 56272/488281 [12:23<1:03:08, 114.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 56300/488281 [12:23<58:01, 124.08it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.82453155517578, 'l2_loss': 66.62789916992188, 'l1_loss': 57.196632385253906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 12%|█▏        | 56328/488281 [12:23<55:31, 129.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56356/488281 [12:24<54:16, 132.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56384/488281 [12:24<53:41, 134.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56412/488281 [12:24<53:24, 134.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.27731323242188, 'l2_loss': 66.77638244628906, 'l1_loss': 57.50092697143555}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 56440/488281 [12:24<53:14, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 56468/488281 [12:24<1:01:11, 117.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 56496/488281 [12:25<57:21, 125.47it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.4382553100586, 

 12%|█▏        | 56524/488281 [12:25<55:12, 130.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56552/488281 [12:25<54:06, 133.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56580/488281 [12:25<53:34, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56608/488281 [12:25<53:21, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.73985290527344, 'l2_loss': 68.20309448242188, 'l1_loss': 58.53676223754883}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 56636/488281 [12:26<53:12, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 56664/488281 [12:26<1:01:19, 117.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 56692/488281 [12:26<57:06, 125.95it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56720/488281 [12:26<55:03, 130.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.67760467529297, 'l2_loss': 66.89986419677734, 'l1_loss': 57.777740478515625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 12%|█▏        | 56748/488281 [12:27<54:02, 133.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56776/488281 [12:27<53:32, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56804/488281 [12:27<53:18, 134.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.91873168945312, 'l2_loss': 66.4425048828125, 'l1_loss': 57.47622299194336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 12%|█▏        | 56818/488281 [12:27<53:14, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 56846/488281 [12:27<1:01:10, 117.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 56874/488281 [12:28<57:00, 126.11it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56902/488281 [12:28<55:01, 130.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.75267028808594, 'l2_loss': 66.8564453125, 'l1_loss': 57.89622497558594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 12%|█▏        | 56930/488281 [12:28<54:00, 133.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56958/488281 [12:28<53:30, 134.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 56986/488281 [12:28<53:16, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57000/488281 [12:28<53:12, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.31533813476562, 'l2_loss': 66.85401916503906, 'l1_loss': 57.46131896972656}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.901853561401367 4.33400297164917 8.980191230773926
91.49%
Reconstruction: (0.9149033722483452, 3.901853561401367, 4.33400297164917, 8.980191230773926)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.00it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 12%|█▏        | 57014/488281 [12:33<12:26:53,  9.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 57038/488281 [12:33<7:03:10, 16.98it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 57066/488281 [12:34<3:48:40, 31.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57094/488281 [12:34<2:17:46, 52.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.8836669921875, 'l2_loss': 67.47254943847656, 'l1_loss': 57.4111213684082}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 12%|█▏        | 57122/488281 [12:34<1:34:15, 76.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57150/488281 [12:34<1:13:09, 98.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57178/488281 [12:34<1:02:50, 114.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57206/488281 [12:35<57:52, 124.14it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.9918212890625, 'l2_loss': 66.44027709960938, 'l1_loss': 57.55154037475586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 57234/488281 [12:35<1:04:45, 110.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 57262/488281 [12:35<58:43, 122.32it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57290/488281 [12:35<55:46, 128.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57318/488281 [12:35<54:21, 132.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.64440155029297, 'l2_loss': 67.03643035888672, 'l1_loss': 57.60797119140625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 57346/488281 [12:36<53:38, 133.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57374/488281 [12:36<53:17, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57402/488281 [12:36<53:09, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.86197662353516, 'l2_loss': 67.26566314697266, 'l1_loss': 57.5963134765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 12%|█▏        | 57430/488281 [12:36<1:01:03, 117.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 57458/488281 [12:37<56:56, 126.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57486/488281 [12:37<54:55, 130.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57514/488281 [12:37<53:55, 133.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.53388214111328, 'l2_loss': 66.91877746582031, 'l1_loss': 57.61510467529297}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 57542/488281 [12:37<53:40, 133.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57570/488281 [12:37<53:19, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57598/488281 [12:38<53:07, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 57612/488281 [12:38<1:04:48, 110.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.52732849121094, 'l2_loss': 67.19895935058594, 'l1_loss': 57.328372955322266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 12%|█▏        | 57640/488281 [12:38<58:45, 122.14it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57668/488281 [12:38<55:46, 128.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57696/488281 [12:38<54:18, 132.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.40750122070312, 'l2_loss': 66.97586822509766, 'l1_loss': 57.4316291809082}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 12%|█▏        | 57724/488281 [12:39<53:35, 133.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57752/488281 [12:39<53:15, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57780/488281 [12:39<53:06, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 57808/488281 [12:39<1:06:52, 107.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.65667724609375, 'l2_loss': 67.01165771484375, 'l1_loss': 57.645023345947266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 12%|█▏        | 57836/488281 [12:40<59:50, 119.90it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57864/488281 [12:40<56:18, 127.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57892/488281 [12:40<54:33, 131.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57920/488281 [12:40<53:45, 133.42it/s]

{'loss': 125.73283386230469, 'l2_loss': 67.70639038085938, 'l1_loss': 58.02644348144531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 57948/488281 [12:40<53:19, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 57976/488281 [12:41<53:04, 135.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 57990/488281 [12:41<1:04:38, 110.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 45
acts.shape=torch.Size([4096, 4096])
{'loss': 124.16645050048828, 'l2_loss': 66.68476867675781, 'l1_loss': 57.48168182373047}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8691577911376953 4.296135425567627 8.8919


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.37it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


 12%|█▏        | 58002/488281 [12:45<13:08:26,  9.10it/s]

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58016/488281 [12:46<9:21:25, 12.77it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58044/488281 [12:46<4:58:10, 24.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58072/488281 [12:46<2:52:10, 41.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58100/488281 [12:46<1:51:07, 64.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.81328582763672, 'l2_loss': 66.85148620605469, 'l1_loss': 57.96179962158203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 58128/488281 [12:46<1:21:23, 88.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58156/488281 [12:47<1:06:50, 107.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58184/488281 [12:47<1:11:22, 100.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58212/488281 [12:47<1:01:59, 115.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.8934555053711, 'l2_loss': 66.74725341796875, 'l1_loss': 57.146202087402344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 58240/488281 [12:47<57:21, 124.96it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58268/488281 [12:47<55:04, 130.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58296/488281 [12:48<53:55, 132.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58324/488281 [12:48<53:24, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35114288330078, 'l2_loss': 66.70703887939453, 'l1_loss': 57.64410400390625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 58352/488281 [12:48<53:08, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58366/488281 [12:48<53:03, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58394/488281 [12:48<1:00:58, 117.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58422/488281 [12:49<56:52, 125.95it/s]  

{'loss': 124.42829895019531, 'l2_loss': 66.79306030273438, 'l1_loss': 57.63523483276367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 58450/488281 [12:49<54:47, 130.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58478/488281 [12:49<53:48, 133.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58506/488281 [12:49<53:19, 134.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.67861938476562, 'l2_loss': 66.640625, 'l1_loss': 57.03799057006836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Si

 12%|█▏        | 58534/488281 [12:50<53:06, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58548/488281 [12:50<53:15, 134.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58576/488281 [12:50<1:01:07, 117.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58604/488281 [12:50<56:56, 125.78it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.59614562988281, 'l2_loss': 66.95120239257812, 'l1_loss': 57.64494705200195}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 58632/488281 [12:50<54:49, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58660/488281 [12:51<53:48, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58688/488281 [12:51<53:17, 134.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58716/488281 [12:51<53:04, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.71137237548828, 'l2_loss': 66.93292236328125, 'l1_loss': 57.77845001220703}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 58744/488281 [12:51<52:57, 135.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58772/488281 [12:51<1:00:56, 117.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58800/488281 [12:52<56:47, 126.06it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.24786376953125, 'l2_loss': 66.70964813232422, 'l1_loss': 57.538211822509766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 12%|█▏        | 58828/488281 [12:52<54:47, 130.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58856/488281 [12:52<53:45, 133.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58884/488281 [12:52<53:14, 134.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 58912/488281 [12:52<53:01, 134.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.06399536132812, 'l2_loss': 67.35302734375, 'l1_loss': 57.71096420288086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 12%|█▏        | 58940/488281 [12:53<52:55, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58968/488281 [12:53<1:00:49, 117.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 58996/488281 [12:53<56:43, 126.12it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.36it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 12%|█▏        | 59023/488281 [12:58<9:05:20, 13.12it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59051/488281 [12:58<4:52:25, 24.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59079/488281 [12:58<2:49:45, 42.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59107/488281 [12:58<1:50:02, 65.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.46220397949219, 'l2_loss': 67.12203216552734, 'l1_loss': 57.340171813964844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 12%|█▏        | 59135/488281 [12:59<1:20:49, 88.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59149/488281 [12:59<1:24:00, 85.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59177/488281 [12:59<1:08:04, 105.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59205/488281 [12:59<1:00:17, 118.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.14747619628906, 'l2_loss': 67.28826141357422, 'l1_loss': 57.85921859741211}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 59233/488281 [12:59<56:26, 126.68it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59261/488281 [13:00<54:34, 131.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59289/488281 [13:00<53:39, 133.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.39798736572266,

 12%|█▏        | 59317/488281 [13:00<53:13, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59345/488281 [13:00<1:00:59, 117.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59373/488281 [13:01<56:46, 125.91it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59401/488281 [13:01<54:44, 130.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.0992431640625, 'l2_loss': 67.35192108154297, 'l1_loss': 57.7473258972168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 12%|█▏        | 59429/488281 [13:01<53:42, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59457/488281 [13:01<53:12, 134.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59485/488281 [13:01<52:57, 134.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59513/488281 [13:02<52:52, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.27278137207031, 'l2_loss': 67.454833984375, 'l1_loss': 57.81795120239258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59541/488281 [13:02<1:00:47, 117.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59569/488281 [13:02<56:51, 125.66it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59597/488281 [13:02<54:45, 130.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59625/488281 [13:03<53:45, 132.89it/s]

{'loss': 124.48966979980469, 'l2_loss': 67.02328491210938, 'l1_loss': 57.46638107299805}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 59653/488281 [13:03<53:12, 134.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59681/488281 [13:03<52:57, 134.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59709/488281 [13:03<52:50, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.87106323242188, 'l2_loss': 67.11554718017578, 'l1_loss': 57.75551986694336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 59737/488281 [13:03<1:00:55, 117.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59765/488281 [13:04<56:44, 125.88it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59793/488281 [13:04<54:40, 130.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59821/488281 [13:04<53:39, 133.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.04779052734375, 'l2_loss': 66.76171875, 'l1_loss': 57.286067962646484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch

 12%|█▏        | 59849/488281 [13:04<53:09, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59877/488281 [13:04<52:53, 134.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59891/488281 [13:05<52:51, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.75431823730469, 'l2_loss': 67.25750732421875, 'l1_loss': 57.49681091308594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59919/488281 [13:05<1:00:45, 117.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 59947/488281 [13:05<56:36, 126.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59975/488281 [13:05<54:35, 130.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 59989/488281 [13:05<54:00, 132.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 46



  0%|          | 0/50 [00:00<?, ?it/s][A

acts.shape=torch.Size([65536, 4096])



  4%|▍         | 2/50 [00:00<00:02, 18.64it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



  8%|▊         | 4/50 [00:00<00:03, 12.82it/s][A

acts.shape=torch.Size([65536, 4096])



 12%|█▏        | 6/50 [00:00<00:03, 11.67it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 16%|█▌        | 8/50 [00:00<00:03, 11.18it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 20%|██        | 10/50 [00:00<00:03, 10.94it/s][A

acts.shape=torch.Size([65536, 4096])



 24%|██▍       | 12/50 [00:01<00:03, 10.80it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 28%|██▊       | 14/50 [00:01<00:03, 10.71it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 32%|███▏      | 16/50 [00:01<00:03, 10.65it/s][A

acts.shape=torch.Size([65536, 4096])



 36%|███▌      | 18/50 [00:01<00:03, 10.61it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 40%|████      | 20/50 [00:01<00:02, 10.59it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 44%|████▍     | 22/50 [00:02<00:02, 10.57it/s][A

acts.shape=torch.Size([65536, 4096])



 48%|████▊     | 24/50 [00:02<00:02, 10.56it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 52%|█████▏    | 26/50 [00:02<00:02, 10.55it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 56%|█████▌    | 28/50 [00:02<00:02, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])



 60%|██████    | 30/50 [00:02<00:01, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 64%|██████▍   | 32/50 [00:02<00:01, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 68%|██████▊   | 34/50 [00:03<00:01, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])



 72%|███████▏  | 36/50 [00:03<00:01, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 76%|███████▌  | 38/50 [00:03<00:01, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 40/50 [00:03<00:00, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])



 84%|████████▍ | 42/50 [00:03<00:00, 10.49it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 88%|████████▊ | 44/50 [00:04<00:00, 10.35it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 92%|█████████▏| 46/50 [00:04<00:00, 10.23it/s][A
 96%|█████████▌| 48/50 [00:04<00:00, 10.18it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 50/50 [00:04<00:00, 10.62it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
Resetting neurons! tensor(1, device='cuda:0')
torch.Size([4096, 512]) torch.Size([512, 4096]) torch.Size([4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.40713500976562, 'l2_loss': 67.05929565429688, 'l1_loss': 57.347835540771484}





acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.843566417694092 4.279170513153076 8.985315322875977
91.53%
Reconstruction: (0.9152809474962925, 3.843566417694092, 4.279170513153076, 8.985315322875977)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.36it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 12%|█▏        | 60017/488281 [13:15<17:31:25,  6.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60045/488281 [13:15<9:02:00, 13.17it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60073/488281 [13:15<4:52:24, 24.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60101/488281 [13:16<3:01:21, 39.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.47798156738281, 'l2_loss': 67.0816650390625, 'l1_loss': 57.39632034301758}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 60129/488281 [13:16<1:55:43, 61.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60157/488281 [13:16<1:23:33, 85.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60185/488281 [13:16<1:07:46, 105.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60213/488281 [13:16<1:00:04, 118.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.79902648925781, 'l2_loss': 66.58488464355469, 'l1_loss': 57.21414566040039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 60241/488281 [13:17<56:15, 126.81it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60269/488281 [13:17<54:23, 131.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60283/488281 [13:17<53:51, 132.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 60311/488281 [13:17<1:01:12, 116.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.85676574707031, 'l2_loss': 67.55419921875, 'l1_loss': 57.30256652832031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 12%|█▏        | 60339/488281 [13:17<56:49, 125.53it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60367/488281 [13:18<54:40, 130.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60395/488281 [13:18<53:37, 133.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.09402465820312, 'l2_loss': 66.76191711425781, 'l1_l

 12%|█▏        | 60423/488281 [13:18<53:07, 134.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60451/488281 [13:18<52:52, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60479/488281 [13:18<52:44, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 60493/488281 [13:19<1:04:23, 110.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.51838684082031, 'l2_loss': 66.69474792480469, 'l1_loss': 57.82363510131836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 60521/488281 [13:19<58:23, 122.09it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60549/488281 [13:19<55:24, 128.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60577/488281 [13:19<53:57, 132.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60605/488281 [13:19<53:16, 133.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.51055908203125, 'l2_loss': 67.19279479980469, 'l1_loss': 57.31776809692383}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 60633/488281 [13:20<52:56, 134.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60661/488281 [13:20<52:45, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 60689/488281 [13:20<1:00:47, 117.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 60717/488281 [13:20<56:35, 125.94it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.56082153320312, 'l2_loss': 66.82320404052734, 'l1_loss': 57.73761749267578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 60745/488281 [13:21<54:31, 130.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60773/488281 [13:21<53:29, 133.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60801/488281 [13:21<53:03, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.1627197265625, 'l2_loss': 67.03863525390625, 'l1_loss': 57.124080657958984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 60829/488281 [13:21<52:48, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60857/488281 [13:21<52:41, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 60885/488281 [13:22<1:00:33, 117.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 12%|█▏        | 60913/488281 [13:22<56:30, 126.05it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.16207885742188, 'l2_loss': 66.94459533691406, 'l1_loss': 57.21748352050781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 12%|█▏        | 60941/488281 [13:22<54:30, 130.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60969/488281 [13:22<53:29, 133.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 12%|█▏        | 60997/488281 [13:22<53:00, 134.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.27146911621094,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 17.47it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 12.12it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 12.04it/s]
 12%|█▏        | 61011/488281 [13:27<12:24:04,  9.57it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61039/488281 [13:27<6:31:18, 18.20it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61053/488281 [13:27<4:49:37, 24.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61080/488281 [13:28<3:03:10, 38.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61108/488281 [13:28<1:56:00, 61.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.78197479248047, 'l2_loss': 66.32923889160156, 'l1_loss': 57.452735900878906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 13%|█▎        | 61136/488281 [13:28<1:23:25, 85.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61164/488281 [13:28<1:07:32, 105.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61192/488281 [13:29<59:47, 119.04it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61220/488281 [13:29<56:01, 127.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.88001251220703, 'l2_loss': 67.2563247680664, 'l1_loss': 57.623687744140625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 61234/488281 [13:29<54:55, 129.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61262/488281 [13:29<1:03:12, 112.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61290/488281 [13:29<57:40, 123.40it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.08425903320312,

 13%|█▎        | 61318/488281 [13:30<54:58, 129.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61346/488281 [13:30<53:38, 132.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61374/488281 [13:30<52:57, 134.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61402/488281 [13:30<52:40, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.01592254638672, 'l2_loss': 67.479248046875, 'l1_loss': 57.53667449951172}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 13%|█▎        | 61430/488281 [13:30<52:30, 135.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61458/488281 [13:31<1:01:23, 115.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61486/488281 [13:31<56:46, 125.29it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61514/488281 [13:31<54:46, 129.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.25994873046875, 'l2_loss': 67.482666015625, 'l1_loss': 57.77728271484375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 13%|█▎        | 61542/488281 [13:31<53:32, 132.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61570/488281 [13:32<52:54, 134.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61598/488281 [13:32<52:36, 135.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.10748291015625, 'l2_loss': 66.81617736816406, 'l1_loss': 57.29130172729492}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 61626/488281 [13:32<52:29, 135.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61654/488281 [13:32<1:01:31, 115.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61682/488281 [13:32<56:49, 125.11it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61710/488281 [13:33<54:32, 130.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.36126708984375, 'l2_loss': 67.41263580322266, 'l1_loss': 57.94863510131836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 61738/488281 [13:33<53:23, 133.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61766/488281 [13:33<52:50, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61794/488281 [13:33<52:33, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61822/488281 [13:33<52:26, 135.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.843017578125, 'l2_loss': 66.9420166015625, 'l1_loss': 57.901004791259766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61836/488281 [13:34<1:05:32, 108.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 61864/488281 [13:34<58:46, 120.92it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61892/488281 [13:34<55:27, 128.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.50344848632812, 'l2_loss': 67.00409698486328, 'l1_loss': 57.49934768676758}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 61920/488281 [13:34<53:50, 131.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61948/488281 [13:34<53:02, 133.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61976/488281 [13:35<52:38, 134.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 61990/488281 [13:35<52:31, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 47
acts.shape=torch.Size([4096, 4096])
{'loss': 124.75675201416016, 'l2_loss': 67.35211181640625, 'l1_loss': 57.404640197753906}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8819870948791504 4.311391353607178 8.95824909210205
91.54%
Reconstruction: (0.9154093585077082, 3.8819870948791504, 4.311391353607178, 8.95824909210205)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.39it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 13%|█▎        | 62016/488281 [13:40<9:28:46, 12.49it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 62044/488281 [13:40<4:59:42, 23.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62072/488281 [13:40<2:52:11, 41.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62100/488281 [13:40<1:50:43, 64.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.81071472167969, 'l2_loss': 66.6663589477539, 'l1_loss': 57.14435958862305}
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 13%|█▎        | 62128/488281 [13:40<1:20:49, 87.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62156/488281 [13:41<1:06:12, 107.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62184/488281 [13:41<59:04, 120.23it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62198/488281 [13:41<57:00, 124.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.19758605957031, 'l2_loss': 67.01372528076172, 'l1_loss': 57.18386459350586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

 13%|█▎        | 62226/488281 [13:41<1:02:27, 113.68it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 62254/488281 [13:41<57:13, 124.08it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62282/488281 [13:42<54:39, 129.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62310/488281 [13:42<53:25, 132.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.86597442626953, 'l2_loss': 67.24874877929688, 'l1_loss': 57.617225646972656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 13%|█▎        | 62338/488281 [13:42<52:47, 134.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62366/488281 [13:42<52:29, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62394/488281 [13:42<52:20, 135.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 62422/488281 [13:43<1:00:08, 118.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 126.16138458251953, 'l2_loss': 67.78919982910156, 'l1_loss': 58.37218475341797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 62450/488281 [13:43<56:04, 126.55it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62478/488281 [13:43<54:05, 131.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62506/488281 [13:43<53:09, 133.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.338134765625, 'l2_loss': 67.57991027832031, 'l1_loss': 57.75822448730469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 13%|█▎        | 62534/488281 [13:44<52:58, 133.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62562/488281 [13:44<52:34, 134.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62590/488281 [13:44<52:22, 135.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 62604/488281 [13:44<1:04:05, 110.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.99000549316406, 'l2_loss': 66.65538024902344, 'l1_loss': 57.33462905883789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 62632/488281 [13:44<58:00, 122.28it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62660/488281 [13:45<55:01, 128.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62688/488281 [13:45<53:33, 132.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.06571197509766,

 13%|█▎        | 62716/488281 [13:45<52:51, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62744/488281 [13:45<52:29, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62772/488281 [13:45<52:19, 135.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 62800/488281 [13:46<1:00:14, 117.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.78094482421875, 'l2_loss': 67.48844909667969, 'l1_loss': 57.29249954223633}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 62828/488281 [13:46<56:07, 126.32it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62856/488281 [13:46<54:05, 131.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62884/488281 [13:46<53:05, 133.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62912/488281 [13:46<52:37, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.30073547363281, 'l2_loss': 67.56326293945312, 'l1_loss': 57.73747634887695}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 62940/488281 [13:47<52:22, 135.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 62968/488281 [13:47<52:15, 135.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 62996/488281 [13:47<1:04:35, 109.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.15257263183594, 'l2_loss': 68.03125762939453, 'l1_l


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.42it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 13%|█▎        | 63023/488281 [13:52<9:05:28, 12.99it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63051/488281 [13:52<4:52:23, 24.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63079/488281 [13:52<2:49:24, 41.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63107/488281 [13:53<1:49:31, 64.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.03947448730469, 'l2_loss': 66.88873291015625, 'l1_loss': 57.15073776245117}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 63135/488281 [13:53<1:20:14, 88.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63163/488281 [13:53<1:05:54, 107.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63191/488281 [13:53<1:06:54, 105.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63219/488281 [13:53<59:23, 119.28it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.673583984375, 'l2_loss': 67.82383728027344, 'l1_loss': 57.8497428894043}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 13%|█▎        | 63247/488281 [13:54<55:40, 127.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63275/488281 [13:54<53:51, 131.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63303/488281 [13:54<53:00, 133.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.19868469238281, 'l2_loss': 66.51895141601562, 'l1_loss': 57.67972946166992}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 63331/488281 [13:54<52:33, 134.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63359/488281 [13:54<52:19, 135.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63373/488281 [13:55<1:03:33, 111.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63401/488281 [13:55<57:45, 122.61it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.27951049804688, 'l2_loss': 67.00677490234375, 'l1_loss': 57.27273941040039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 63429/488281 [13:55<54:51, 129.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63457/488281 [13:55<53:27, 132.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63485/488281 [13:55<52:45, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63513/488281 [13:56<52:26, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.61524963378906, 'l2_loss': 66.81246185302734, 'l1_loss': 57.80278396606445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 63541/488281 [13:56<52:16, 135.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63569/488281 [13:56<1:00:28, 117.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63597/488281 [13:56<56:11, 125.96it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.28334045410156, 'l2_loss': 67.13131713867188, 'l1_loss': 57.15202331542969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 63625/488281 [13:57<54:07, 130.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63653/488281 [13:57<53:04, 133.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63681/488281 [13:57<52:33, 134.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63709/488281 [13:57<52:20, 135.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.09327697753906, 'l2_loss': 67.44947052001953, 'l1_loss': 57.6438102722168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 13%|█▎        | 63737/488281 [13:57<52:12, 135.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63765/488281 [13:58<1:00:03, 117.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63793/488281 [13:58<55:58, 126.39it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63821/488281 [13:58<53:59, 131.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.4233169555664, 'l2_loss': 67.12986755371094, 'l1_loss': 57.29344940185547}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 13%|█▎        | 63849/488281 [13:58<52:59, 133.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63877/488281 [13:59<52:30, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 63905/488281 [13:59<52:17, 135.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.355712890625, 'l2_loss': 66.92312622070312, 'l1_loss': 57.43258285522461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 13%|█▎        | 63933/488281 [13:59<52:10, 135.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63961/488281 [13:59<1:00:03, 117.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 63989/488281 [13:59<55:57, 126.37it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.27it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A
 13%|█▎        | 64002/488281 [14:04<12:34:19,  9.37it/s]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64030/488281 [14:04<6:32:20, 18.02it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64044/488281 [14:04<4:49:30, 24.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64072/488281 [14:05<2:48:13, 42.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64100/488281 [14:05<1:48:52, 64.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.77179718017578, 'l2_loss': 67.13141632080078, 'l1_loss': 57.640380859375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 13%|█▎        | 64128/488281 [14:05<1:31:10, 77.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64156/488281 [14:05<1:11:13, 99.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64184/488281 [14:05<1:01:26, 115.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64212/488281 [14:06<56:39, 124.76it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.70948791503906, 'l2_loss': 67.02123260498047, 'l1_loss': 57.68825912475586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 64240/488281 [14:06<54:17, 130.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64268/488281 [14:06<53:07, 133.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64296/488281 [14:06<52:33, 134.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64310/488281 [14:06<52:26, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.85760498046875, 'l2_loss': 67.47061920166016, 'l1_loss': 57.38698959350586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64338/488281 [14:07<1:00:06, 117.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64366/488281 [14:07<55:58, 126.24it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64394/488281 [14:07<53:56, 130.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97789764404297,

 13%|█▎        | 64422/488281 [14:07<52:57, 133.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64450/488281 [14:07<52:27, 134.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64478/488281 [14:08<52:12, 135.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64506/488281 [14:08<52:08, 135.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.67929077148438, 'l2_loss': 67.07714080810547, 'l1_loss': 57.602149963378906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64534/488281 [14:08<59:59, 117.73it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64562/488281 [14:08<55:53, 126.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64590/488281 [14:09<54:02, 130.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64618/488281 [14:09<52:59, 133.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97299194335938, 'l2_loss': 66.58663940429688, 'l1_loss': 57.3863525390625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 13%|█▎        | 64646/488281 [14:09<52:28, 134.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64674/488281 [14:09<52:13, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64702/488281 [14:09<52:07, 135.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.19110107421875, 'l2_loss': 66.78446960449219, 'l1_loss': 57.4066276550293}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64716/488281 [14:10<1:03:31, 111.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64744/488281 [14:10<57:37, 122.51it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64772/488281 [14:10<54:43, 128.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64800/488281 [14:10<53:17, 132.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.13404846191406, 'l2_loss': 67.33089447021484, 'l1_loss': 56.80315017700195}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 64828/488281 [14:10<52:37, 134.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64856/488281 [14:11<52:16, 135.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64884/488281 [14:11<52:06, 135.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 64912/488281 [14:11<59:58, 117.66it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34649658203125, 'l2_loss': 66.88095092773438, 'l1_loss': 57.465545654296875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 13%|█▎        | 64940/488281 [14:11<55:52, 126.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64968/488281 [14:12<53:51, 130.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 64996/488281 [14:12<52:52, 133.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.04362487792969, 'l2_loss': 67.28584289550781, 'l1_loss': 57.75777816772461}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
act


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.34it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 13%|█▎        | 65024/488281 [14:16<8:48:58, 13.34it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65052/488281 [14:17<4:45:40, 24.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65080/488281 [14:17<2:46:50, 42.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65107/488281 [14:17<2:06:18, 55.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.00218200683594, 'l2_loss': 66.75253295898438, 'l1_loss': 57.2496452331543}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 13%|█▎        | 65135/488281 [14:17<1:28:04, 80.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65163/488281 [14:18<1:09:33, 101.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65191/488281 [14:18<1:00:31, 116.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.68434143066406, 'l2_loss': 67.09402465820312, 'l1_loss': 57.59031677246094}
acts.shape=

 13%|█▎        | 65219/488281 [14:18<56:07, 125.64it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65247/488281 [14:18<53:57, 130.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65275/488281 [14:18<52:53, 133.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 65303/488281 [14:19<1:02:18, 113.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.88093566894531, 'l2_loss': 67.3763656616211, 'l1_loss': 57.50456619262695}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 13%|█▎        | 65331/488281 [14:19<56:58, 123.72it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65359/488281 [14:19<54:23, 129.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65387/488281 [14:19<53:06, 132.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65415/488281 [14:20<52:29, 134.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.62645721435547, 'l2_loss': 67.88691711425781, 'l1_loss': 57.739540100097656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 13%|█▎        | 65443/488281 [14:20<52:08, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65471/488281 [14:20<52:00, 135.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 65485/488281 [14:20<1:05:05, 108.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 65513/488281 [14:20<58:28, 120.50it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.12305450439453, 'l2_loss': 67.55839538574219, 'l1_loss': 57.564659118652344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 13%|█▎        | 65541/488281 [14:21<55:11, 127.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65569/488281 [14:21<53:36, 131.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65597/488281 [14:21<53:01, 132.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.43504333496094, 'l2_loss': 66.93549346923828, 'l1_loss': 57.499549865722656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 13%|█▎        | 65625/488281 [14:21<52:33, 134.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65653/488281 [14:21<52:18, 134.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 65681/488281 [14:22<1:00:25, 116.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 65709/488281 [14:22<56:11, 125.35it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.64126586914062, 'l2_loss': 66.97016906738281, 'l1_loss': 57.67109298706055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 13%|█▎        | 65737/488281 [14:22<54:05, 130.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65765/488281 [14:22<53:01, 132.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65793/488281 [14:23<52:30, 134.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.95573425292969,

 13%|█▎        | 65821/488281 [14:23<52:17, 134.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 13%|█▎        | 65849/488281 [14:23<52:08, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 65877/488281 [14:23<59:59, 117.34it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 13%|█▎        | 65905/488281 [14:23<55:56, 125.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.0721435546875, 'l2_loss': 67.36749267578125, 'l1_loss': 57.704654693603516}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▎        | 65933/488281 [14:24<53:57, 130.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 65961/488281 [14:24<52:55, 132.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 65989/488281 [14:24<52:27, 134.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.39it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.88it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 14%|█▎        | 66017/488281 [14:29<8:49:50, 13.28it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66045/488281 [14:29<4:46:09, 24.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 66072/488281 [14:29<2:56:11, 39.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66100/488281 [14:29<1:52:38, 62.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.54051208496094, 'l2_loss': 66.69938659667969, 'l1_loss': 56.841121673583984}
acts.shape=torch.Size([4096, 4096])
acts.shape

 14%|█▎        | 66128/488281 [14:30<1:21:40, 86.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66156/488281 [14:30<1:06:27, 105.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66184/488281 [14:30<59:04, 119.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66212/488281 [14:30<55:27, 126.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.56965637207031, 'l2_loss': 66.5654296875, 'l1_loss': 57.00423049926758}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 14%|█▎        | 66226/488281 [14:30<54:24, 129.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 66254/488281 [14:31<1:01:11, 114.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 66282/488281 [14:31<56:28, 124.53it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66310/488281 [14:31<54:13, 129.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.45474243164062, 'l2_loss': 67.50538635253906, 'l1_loss': 57.9493522644043}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 14%|█▎        | 66338/488281 [14:31<53:02, 132.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66366/488281 [14:32<52:29, 133.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66394/488281 [14:32<52:13, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.20999908447266, 'l2_loss': 67.1463623046875, 'l1_loss': 57.063636779785156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▎        | 66422/488281 [14:32<52:07, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 66450/488281 [14:32<59:50, 117.47it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 66478/488281 [14:32<55:48, 125.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66506/488281 [14:33<53:52, 130.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.25537109375, 'l2_loss': 67.07797241210938, 'l1_loss': 57.17739486694336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 14%|█▎        | 66534/488281 [14:33<52:53, 132.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66562/488281 [14:33<52:22, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66590/488281 [14:33<52:09, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66618/488281 [14:33<52:16, 134.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.37406921386719, 'l2_loss': 67.26395416259766, 'l1_loss': 57.110111236572266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 66646/488281 [14:34<1:00:14, 116.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 66674/488281 [14:34<55:59, 125.51it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66702/488281 [14:34<53:56, 130.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.91024780273438, 'l2_loss': 66.74153137207031, 'l1_loss': 57.16871643066406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▎        | 66730/488281 [14:34<52:52, 132.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66758/488281 [14:35<52:25, 134.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66786/488281 [14:35<52:11, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66814/488281 [14:35<52:05, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.16851806640625, 'l2_loss': 67.65594482421875, 'l1_loss': 57.512577056884766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 66828/488281 [14:35<1:03:07, 111.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 66856/488281 [14:35<57:23, 122.38it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66884/488281 [14:36<54:33, 128.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66912/488281 [14:36<53:13, 131.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.70738220214844, 'l2_loss': 67.42469024658203, 'l1_loss': 57.28268814086914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▎        | 66940/488281 [14:36<52:33, 133.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66968/488281 [14:36<52:12, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 66996/488281 [14:36<52:03, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.83135986328125, 'l2_loss': 66.81010437011719, 'l1_loss': 57.02125930786133}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.879379987716675 4.3118605


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.28it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 14%|█▎        | 67010/488281 [14:41<12:20:55,  9.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▎        | 67038/488281 [14:41<6:29:30, 18.02it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 67066/488281 [14:41<3:37:16, 32.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▎        | 67094/488281 [14:42<2:12:53, 52.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.18212890625, 'l2_loss': 67.1597671508789, 'l1_loss': 57.02235794067383}
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 14%|█▎        | 67122/488281 [14:42<1:31:54, 76.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 67150/488281 [14:42<1:11:28, 98.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67178/488281 [14:42<1:01:27, 114.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67192/488281 [14:42<58:34, 119.80it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.12258911132812, 'l2_loss': 67.06571197509766, 'l1_loss': 57.0568733215332}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 67220/488281 [14:43<1:03:27, 110.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67248/488281 [14:43<57:31, 121.97it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67276/488281 [14:43<54:36, 128.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67304/488281 [14:43<53:11, 131.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.87570190429688, 'l2_loss': 66.4728775024414, 'l1_loss': 57.40282440185547}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 14%|█▍        | 67332/488281 [14:44<52:29, 133.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67360/488281 [14:44<52:09, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67388/488281 [14:44<51:57, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

 14%|█▍        | 67416/488281 [14:44<59:38, 117.59it/s]  


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.18515014648438, 'l2_loss': 66.88272094726562, 'l1_loss': 57.30242919921875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 14%|█▍        | 67444/488281 [14:44<55:38, 126.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67472/488281 [14:45<53:42, 130.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67500/488281 [14:45<52:41, 133.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.95701599121094,

 14%|█▍        | 67528/488281 [14:45<52:15, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67556/488281 [14:45<52:02, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67570/488281 [14:45<51:58, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 67598/488281 [14:46<59:38, 117.56it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.89938354492188, 'l2_loss': 66.67143249511719, 'l1_loss': 57.22795486450195}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 67626/488281 [14:46<55:38, 126.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67654/488281 [14:46<53:47, 130.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67682/488281 [14:46<52:44, 132.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67710/488281 [14:46<52:15, 134.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.10453796386719, 'l2_loss': 66.8960189819336, 'l1_loss': 57.20852279663086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 14%|█▍        | 67738/488281 [14:47<52:00, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67766/488281 [14:47<51:54, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 67794/488281 [14:47<59:45, 117.26it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.06816101074219, 'l2_loss': 67.35035705566406, 'l1_l

 14%|█▍        | 67822/488281 [14:47<55:42, 125.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67850/488281 [14:48<53:41, 130.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67878/488281 [14:48<52:41, 132.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 67906/488281 [14:48<52:15, 134.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.12899780273438, 'l2_loss': 66.644287109375, 'l1_loss': 57.484710693359375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 14%|█▍        | 67934/488281 [14:48<52:19, 133.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 67962/488281 [14:48<52:18, 133.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 67990/488281 [14:49<1:00:04, 116.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.40it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.83it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 14%|█▍        | 68017/488281 [14:53<8:58:05, 13.02it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68045/488281 [14:54<4:48:10, 24.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68073/488281 [14:54<2:47:08, 41.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68101/488281 [14:54<1:48:11, 64.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.89112854003906, 'l2_loss': 66.70764923095703, 'l1_loss': 57.183475494384766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 14%|█▍        | 68129/488281 [14:54<1:19:21, 88.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68157/488281 [14:54<1:05:33, 106.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68185/488281 [14:55<1:06:17, 105.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68213/488281 [14:55<58:53, 118.89it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.98296356201172, 'l2_loss': 67.03911590576172, 'l1_loss': 56.94384765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 14%|█▍        | 68241/488281 [14:55<55:13, 126.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68269/488281 [14:55<53:23, 131.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68297/488281 [14:56<52:30, 133.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68325/488281 [14:56<52:06, 134.30it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.19000244140625, 'l2_loss': 67.11626434326172, 'l1_loss': 57.07373809814453}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 68339/488281 [14:56<51:59, 134.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68367/488281 [14:56<59:32, 117.53it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68395/488281 [14:56<55:33, 125.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.41007995605469, 'l2_loss': 66.98483276367188, 'l1_loss': 57.42524719238281}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 68423/488281 [14:57<53:34, 130.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68451/488281 [14:57<52:35, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68479/488281 [14:57<52:06, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68507/488281 [14:57<51:55, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.74151611328125, 'l2_loss': 67.57192993164062, 'l1_loss': 57.16958999633789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 68535/488281 [14:57<51:46, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68563/488281 [14:58<59:28, 117.63it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68591/488281 [14:58<55:29, 126.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68619/488281 [14:58<53:33, 130.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.35985565185547, 'l2_loss': 66.57510375976562, 'l1_loss': 56.784751892089844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 14%|█▍        | 68647/488281 [14:58<52:32, 133.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68675/488281 [14:59<52:14, 133.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68703/488281 [14:59<51:57, 134.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.91873168945312, 'l2_loss': 66.99694061279297, 'l1_loss': 56.921791076660156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 14%|█▍        | 68731/488281 [14:59<51:49, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68759/488281 [14:59<59:34, 117.37it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68787/488281 [14:59<55:29, 125.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68815/488281 [15:00<53:32, 130.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.914794921875, 'l2_loss': 66.99494934082031, 'l1_loss': 56.91984176635742}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 14%|█▍        | 68843/488281 [15:00<52:34, 132.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68871/488281 [15:00<52:05, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68899/488281 [15:00<51:51, 134.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.90349578857422,

 14%|█▍        | 68927/488281 [15:00<51:44, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68941/488281 [15:01<1:02:51, 111.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 68969/488281 [15:01<57:07, 122.34it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 68997/488281 [15:01<54:18, 128.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.27241516113281, 'l2_loss': 66.93504333496094, 'l1_loss': 57.33737564086914}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9542250633239746 4.379071


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 17.41it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.50it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 14%|█▍        | 69025/488281 [15:06<8:46:08, 13.28it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69053/488281 [15:06<4:44:06, 24.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69081/488281 [15:06<2:45:31, 42.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69109/488281 [15:06<1:47:27, 65.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.38397979736328, 'l2_loss': 67.11658477783203, 'l1_loss': 57.26739501953125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 69123/488281 [15:07<1:45:53, 65.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 69151/488281 [15:07<1:18:12, 89.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 69179/488281 [15:07<1:04:48, 107.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69207/488281 [15:07<58:07, 120.18it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.6798324584961, 'l2_loss': 67.43358612060547, 'l1_loss': 57.246246337890625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 69235/488281 [15:07<54:47, 127.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69263/488281 [15:08<53:10, 131.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69291/488281 [15:08<52:23, 133.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69305/488281 [15:08<52:11, 133.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.52023315429688, 'l2_loss': 66.88908386230469, 'l1_loss': 57.63114547729492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 69333/488281 [15:08<1:00:14, 115.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69361/488281 [15:08<55:48, 125.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69389/488281 [15:09<53:37, 130.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69417/488281 [15:09<52:34, 132.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.43630981445312, 'l2_loss': 67.11711120605469, 'l1_loss': 57.31919479370117}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 69445/488281 [15:09<52:02, 134.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69473/488281 [15:09<51:46, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69501/488281 [15:09<51:41, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.28860473632812, 'l2_loss': 67.21145629882812, 'l1_loss': 57.077144622802734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 69529/488281 [15:10<59:57, 116.41it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 69557/488281 [15:10<55:39, 125.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69585/488281 [15:10<53:33, 130.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69613/488281 [15:10<52:33, 132.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.83831787109375, 'l2_loss': 67.077392578125, 'l1_loss': 57.760929107666016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 14%|█▍        | 69641/488281 [15:11<52:01, 134.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69669/488281 [15:11<51:46, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69683/488281 [15:11<52:01, 134.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 69711/488281 [15:11<59:40, 116.90it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.84445190429688, 'l2_loss': 66.76390075683594, 'l1_loss': 57.08054733276367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 69739/488281 [15:11<55:30, 125.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69767/488281 [15:12<53:27, 130.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69795/488281 [15:12<52:27, 132.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.8431396484375, 'l2_loss': 66.92985534667969, 'l1_loss': 56.91328430175781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 14%|█▍        | 69823/488281 [15:12<51:58, 134.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69851/488281 [15:12<51:43, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69879/488281 [15:12<51:36, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 69907/488281 [15:13<59:14, 117.70it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.28266906738281, 'l2_loss': 67.02593994140625, 'l1_loss': 57.25672912597656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 69935/488281 [15:13<55:17, 126.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69963/488281 [15:13<53:20, 130.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 69991/488281 [15:13<52:22, 133.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.30it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 14%|█▍        | 70019/488281 [15:18<8:44:35, 13.29it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70047/488281 [15:18<4:43:18, 24.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70075/488281 [15:18<2:45:03, 42.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 70102/488281 [15:19<1:55:44, 60.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.99767303466797, 'l2_loss': 66.721923828125, 'l1_loss': 57.27574920654297}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 14%|█▍        | 70130/488281 [15:19<1:22:41, 84.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70158/488281 [15:19<1:06:41, 104.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70186/488281 [15:19<59:14, 117.62it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 70214/488281 [15:20<55:16, 126.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.0182113647461, 'l2_loss': 67.11846923828125, 'l1_loss': 56.899742126464844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 70242/488281 [15:20<53:22, 130.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70270/488281 [15:20<52:24, 132.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 70284/488281 [15:20<1:03:24, 109.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 70312/488281 [15:20<57:20, 121.47it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.64508056640625, 'l2_loss': 66.88813781738281, 'l1_loss': 56.75693893432617}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 70340/488281 [15:21<54:21, 128.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70368/488281 [15:21<52:52, 131.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70396/488281 [15:21<52:08, 133.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.66515350341797, 'l2_loss': 67.18122863769531, 'l1_loss': 57.483924865722656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 14%|█▍        | 70424/488281 [15:21<51:48, 134.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70452/488281 [15:21<51:37, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 70480/488281 [15:22<59:24, 117.21it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 70508/488281 [15:22<55:21, 125.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35026550292969, 'l2_loss': 66.989990234375, 'l1_loss': 57.36027526855469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 14%|█▍        | 70536/488281 [15:22<53:19, 130.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70564/488281 [15:22<52:20, 133.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70592/488281 [15:22<51:51, 134.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.74948120117188,

 14%|█▍        | 70620/488281 [15:23<51:39, 134.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70648/488281 [15:23<51:30, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 70676/488281 [15:23<59:12, 117.56it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 14%|█▍        | 70704/488281 [15:23<55:26, 125.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05332946777344, 'l2_loss': 66.82260131835938, 'l1_loss': 57.23073196411133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 14%|█▍        | 70732/488281 [15:24<53:22, 130.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70760/488281 [15:24<52:21, 132.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 14%|█▍        | 70788/488281 [15:24<51:52, 134.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 70816/488281 [15:24<51:36, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.90626525878906, 'l2_loss': 66.80244445800781, 'l1_loss': 57.103824615478516}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 15%|█▍        | 70844/488281 [15:24<51:30, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 70872/488281 [15:25<59:15, 117.40it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 70900/488281 [15:25<55:13, 125.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.90927124023438,

 15%|█▍        | 70928/488281 [15:25<53:17, 130.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 70956/488281 [15:25<52:16, 133.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 70984/488281 [15:26<51:48, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 70998/488281 [15:26<51:40, 134.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.8668212890625, 'l2_loss': 66.7596435546875, 'l1_loss': 57.107177734375}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.891437530517578 4.317140579223633 8.948667526245117
91.58%
Reconstruction: (0.9158228814853787, 3.891437530517578, 4.317140579223633, 8.948667526245117)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.40it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 15%|█▍        | 71026/488281 [15:30<8:41:19, 13.34it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71040/488281 [15:31<6:31:13, 17.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 71068/488281 [15:31<3:37:51, 31.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71096/488281 [15:31<2:12:54, 52.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.33015441894531, 'l2_loss': 67.16613006591797, 'l1_loss': 57.16402816772461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▍        | 71124/488281 [15:31<1:31:20, 76.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71152/488281 [15:31<1:10:57, 97.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71180/488281 [15:32<1:00:56, 114.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71208/488281 [15:32<56:25, 123.19it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.4378433227539, 'l2_loss': 67.30805206298828, 'l1_loss': 57.129791259765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▍        | 71222/488281 [15:32<54:53, 126.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 71250/488281 [15:32<1:05:42, 105.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 71278/488281 [15:32<58:21, 119.11it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71306/488281 [15:33<54:46, 126.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34722900390625, 'l2_loss': 67.10873413085938, 'l1_loss': 57.238494873046875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 15%|█▍        | 71334/488281 [15:33<53:00, 131.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71362/488281 [15:33<52:08, 133.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71390/488281 [15:33<51:43, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71418/488281 [15:33<51:31, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.82778930664062, 'l2_loss': 67.14646911621094, 'l1_loss': 57.68131637573242}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 71446/488281 [15:34<59:08, 117.46it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 71474/488281 [15:34<55:07, 126.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71502/488281 [15:34<53:11, 130.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.20738220214844, 'l2_loss': 67.02764129638672, 'l1_loss': 57.17974090576172}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▍        | 71530/488281 [15:34<52:13, 133.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71558/488281 [15:35<51:45, 134.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71586/488281 [15:35<51:32, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71614/488281 [15:35<51:25, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.85159301757812, 'l2_loss': 67.16087341308594, 'l1_loss': 57.69071960449219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 71628/488281 [15:35<1:02:20, 111.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 71656/488281 [15:35<56:41, 122.48it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71684/488281 [15:36<53:54, 128.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71712/488281 [15:36<52:33, 132.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.80028533935547, 'l2_loss': 67.45066833496094, 'l1_loss': 57.34961700439453}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▍        | 71740/488281 [15:36<52:02, 133.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71768/488281 [15:36<51:38, 134.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71796/488281 [15:36<51:27, 134.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.70927429199219, 'l2_loss': 67.25867462158203, 'l1_loss': 57.45060348510742}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 71824/488281 [15:37<59:09, 117.33it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 71852/488281 [15:37<55:07, 125.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71880/488281 [15:37<53:10, 130.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71908/488281 [15:37<52:12, 132.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.46142578125, 'l2_loss': 66.43498229980469, 'l1_loss': 57.02644348144531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 15%|█▍        | 71936/488281 [15:38<51:41, 134.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71964/488281 [15:38<51:28, 134.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 71992/488281 [15:38<51:21, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 52
acts.shape=torch.Size([4096, 4096])
{'loss':


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 12.68it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 10.92it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 10.91it/s]
 15%|█▍        | 72006/488281 [15:43<12:24:42,  9.32it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 72034/488281 [15:43<6:31:02, 17.74it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72062/488281 [15:43<3:37:41, 31.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72090/488281 [15:43<2:12:45, 52.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72118/488281 [15:44<1:31:09, 76.09it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.09043884277344, 'l2_loss': 67.27877807617188, 'l1_loss': 56.81166458129883}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▍        | 72146/488281 [15:44<1:10:45, 98.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72174/488281 [15:44<1:00:47, 114.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72188/488281 [15:44<57:54, 119.77it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 72216/488281 [15:44<1:06:27, 104.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.94393157958984, 'l2_loss': 66.75267028808594, 'l1_loss': 57.191261291503906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 15%|█▍        | 72244/488281 [15:45<58:39, 118.21it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72272/488281 [15:45<54:49, 126.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72300/488281 [15:45<52:57, 130.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.70550537109375, 'l2_loss': 66.75920104980469, 'l1_loss': 56.94630432128906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▍        | 72328/488281 [15:45<52:03, 133.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72356/488281 [15:45<51:34, 134.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72370/488281 [15:45<51:26, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 72398/488281 [15:46<59:08, 117.19it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.26422882080078, 'l2_loss': 67.0440902709961, 'l1_loss': 57.22013854980469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 15%|█▍        | 72426/488281 [15:46<55:04, 125.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72454/488281 [15:46<53:03, 130.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72482/488281 [15:46<52:04, 133.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72510/488281 [15:47<51:36, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.76590728759766, 'l2_loss': 67.48393249511719, 'l1_loss': 57.28197479248047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▍        | 72538/488281 [15:47<51:22, 134.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72566/488281 [15:47<51:15, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 72594/488281 [15:47<58:56, 117.53it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.61972045898438, 'l2_loss': 67.34807586669922, 'l1_l

 15%|█▍        | 72622/488281 [15:47<54:59, 125.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72650/488281 [15:48<53:00, 130.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72678/488281 [15:48<52:01, 133.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72706/488281 [15:48<51:35, 134.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.29854583740234, 'l2_loss': 66.41465759277344, 'l1_loss': 56.883888244628906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 15%|█▍        | 72734/488281 [15:48<51:20, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72762/488281 [15:49<51:22, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 72790/488281 [15:49<59:01, 117.31it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 72818/488281 [15:49<54:59, 125.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.60843658447266, 'l2_loss': 66.87853240966797, 'l1_loss': 56.72990417480469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▍        | 72846/488281 [15:49<52:59, 130.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72874/488281 [15:49<52:01, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72902/488281 [15:50<51:35, 134.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.91152954101562, 'l2_loss': 66.79999542236328, 'l1_loss': 57.11153793334961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▍        | 72930/488281 [15:50<51:20, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 72958/488281 [15:50<51:13, 135.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 72972/488281 [15:50<1:02:26, 110.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 73000/488281 [15:50<56:37, 122.25it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.86277770996094, 'l2_loss': 66.48092651367188, 'l1_loss': 57.3818473815918}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8725616931915283 4.30181360244751 8.934597969055176
91.52%
Reconstruction: (0.9152017318993342, 3.8725616931915283, 4.30181360244751, 8.934597969055176)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.24it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 15%|█▍        | 73027/488281 [15:55<8:48:27, 13.10it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 73055/488281 [15:55<4:43:10, 24.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 73083/488281 [15:56<2:44:21, 42.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 73111/488281 [15:56<1:46:30, 64.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.84220886230469, 'l2_loss': 67.3678970336914, 'l1_loss': 57.47431564331055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 15%|█▍        | 73139/488281 [15:56<1:18:13, 88.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 73153/488281 [15:56<1:20:48, 85.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▍        | 73181/488281 [15:56<1:05:38, 105.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▍        | 73209/488281 [15:57<58:12, 118.85it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.25310516357422, 'l2_loss': 67.18722534179688, 'l1_loss': 57.065879821777344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 15%|█▍        | 73237/488281 [15:57<54:47, 126.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 73265/488281 [15:57<52:53, 130.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73293/488281 [15:57<51:55, 133.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73321/488281 [15:57<51:29, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.88176727294922, 'l2_loss': 67.39352416992188, 'l1_loss': 57.488243103027344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 15%|█▌        | 73335/488281 [15:57<51:22, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

 15%|█▌        | 73363/488281 [15:58<58:59, 117.23it/s]  


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 73391/488281 [15:58<54:54, 125.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73419/488281 [15:58<52:57, 130.56it/s]

{'loss': 124.70770263671875, 'l2_loss': 67.14933013916016, 'l1_loss': 57.55837631225586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▌        | 73447/488281 [15:58<51:57, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73475/488281 [15:59<51:26, 134.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73503/488281 [15:59<51:15, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.4466781616211, 'l2_loss': 67.20744323730469, 'l1_loss': 57.239234924316406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▌        | 73531/488281 [15:59<51:08, 135.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 73559/488281 [15:59<58:42, 117.74it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 73587/488281 [15:59<54:45, 126.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73615/488281 [16:00<52:51, 130.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.89535522460938, 'l2_loss': 67.57861328125, 'l1_loss': 57.31673812866211}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 15%|█▌        | 73643/488281 [16:00<51:53, 133.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73671/488281 [16:00<51:25, 134.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73699/488281 [16:00<51:12, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.551025390625, '

 15%|█▌        | 73727/488281 [16:01<51:07, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 73741/488281 [16:01<1:02:01, 111.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 73769/488281 [16:01<56:34, 122.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73797/488281 [16:01<53:42, 128.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.56562805175781, 'l2_loss': 67.238525390625, 'l1_loss': 57.32710647583008}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 15%|█▌        | 73825/488281 [16:01<52:19, 132.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73853/488281 [16:02<51:38, 133.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73881/488281 [16:02<51:17, 134.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73909/488281 [16:02<51:09, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.05498504638672, 'l2_loss': 67.63250732421875, 'l1_loss': 57.42247772216797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 73937/488281 [16:02<58:49, 117.38it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 73965/488281 [16:02<54:49, 125.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 73993/488281 [16:03<52:52, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 53
acts.shape=torch.Size([4096, 4096])
{'loss':


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.31it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 15%|█▌        | 74021/488281 [16:07<8:40:10, 13.27it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74049/488281 [16:08<4:40:51, 24.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74077/488281 [16:08<2:43:36, 42.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74105/488281 [16:08<1:46:10, 65.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.58444213867188, 'l2_loss': 67.23811340332031, 'l1_loss': 57.3463249206543}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 74133/488281 [16:08<1:25:32, 80.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 74161/488281 [16:08<1:07:54, 101.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74189/488281 [16:09<59:13, 116.54it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74217/488281 [16:09<55:01, 125.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.11087036132812, 'l2_loss': 66.77449035644531, 'l1_loss': 57.33637619018555}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▌        | 74245/488281 [16:09<52:55, 130.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74273/488281 [16:09<52:05, 132.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 74301/488281 [16:10<51:32, 133.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.11724853515625, 'l2_loss': 67.29485321044922, 'l1_loss': 56.822391510009766}
acts.shape=torch.Size([4096, 4096])
acts.shape

 15%|█▌        | 74329/488281 [16:10<59:10, 116.60it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 74357/488281 [16:10<54:56, 125.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74385/488281 [16:10<52:52, 130.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74413/488281 [16:10<51:54, 132.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.62478637695312, 'l2_loss': 66.62632751464844, 'l1_loss': 56.99845504760742}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▌        | 74441/488281 [16:11<51:23, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74469/488281 [16:11<51:10, 134.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74483/488281 [16:11<51:05, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 74511/488281 [16:11<1:05:20, 105.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.24382781982422, 'l2_loss': 67.2310791015625, 'l1_loss': 57.01274871826172}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 15%|█▌        | 74539/488281 [16:11<57:57, 118.96it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74567/488281 [16:12<54:21, 126.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74595/488281 [16:12<52:34, 131.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.67890930175781, 'l2_loss': 67.48007202148438, 'l1_loss': 57.1988410949707}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 15%|█▌        | 74623/488281 [16:12<51:43, 133.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74651/488281 [16:12<51:16, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74679/488281 [16:13<51:05, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 74707/488281 [16:13<1:00:37, 113.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.9945068359375, 'l2_loss': 66.97196960449219, 'l1_loss': 57.02254104614258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 15%|█▌        | 74735/488281 [16:13<55:39, 123.84it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74763/488281 [16:13<53:12, 129.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74791/488281 [16:13<52:11, 132.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 74819/488281 [16:14<51:31, 133.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.50304412841797, 'l2_loss': 66.92941284179688, 'l1_loss': 56.573631286621094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 15%|█▌        | 74847/488281 [16:14<51:10, 134.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74875/488281 [16:14<51:01, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 74903/488281 [16:14<59:16, 116.22it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.54492950439453, 'l2_loss': 66.88526153564453, 'l1_loss': 56.65966796875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 15%|█▌        | 74931/488281 [16:15<54:59, 125.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74959/488281 [16:15<52:52, 130.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 74987/488281 [16:15<51:49, 132.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.35it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A
 15%|█▌        | 75001/488281 [16:20<12:07:36,  9.47it/s]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 75029/488281 [16:20<6:22:26, 18.01it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75057/488281 [16:20<3:33:19, 32.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75071/488281 [16:20<2:44:34, 41.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 75098/488281 [16:20<1:57:13, 58.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.62750244140625,

 15%|█▌        | 75126/488281 [16:21<1:23:07, 82.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75154/488281 [16:21<1:06:34, 103.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75182/488281 [16:21<58:31, 117.63it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75210/488281 [16:21<54:36, 126.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.6527099609375, 'l2_loss': 67.37738037109375, 'l1_loss': 57.27532958984375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 15%|█▌        | 75238/488281 [16:21<52:40, 130.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75252/488281 [16:22<52:06, 132.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 75280/488281 [16:22<1:00:39, 113.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 75308/488281 [16:22<55:37, 123.73it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.4907455444336, 'l2_loss': 67.44579315185547, 'l1_loss': 57.044952392578125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▌        | 75336/488281 [16:22<53:09, 129.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75364/488281 [16:23<51:56, 132.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75392/488281 [16:23<51:19, 134.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.52217864990234,

 15%|█▌        | 75420/488281 [16:23<51:04, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75448/488281 [16:23<50:55, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 75476/488281 [16:23<59:51, 114.94it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 75504/488281 [16:24<55:16, 124.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.68843078613281, 'l2_loss': 66.74398040771484, 'l1_loss': 56.94445037841797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▌        | 75532/488281 [16:24<52:58, 129.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75560/488281 [16:24<51:51, 132.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75588/488281 [16:24<51:16, 134.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 15%|█▌        | 75616/488281 [16:24<51:02, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.00300598144531, 'l2_loss': 67.41172790527344, 'l1_loss': 56.59128189086914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 15%|█▌        | 75644/488281 [16:25<50:54, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 15%|█▌        | 75672/488281 [16:25<59:26, 115.68it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 75700/488281 [16:25<55:01, 124.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.64370727539062,

 16%|█▌        | 75728/488281 [16:25<52:50, 130.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 75756/488281 [16:26<51:45, 132.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 75784/488281 [16:26<51:12, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 75812/488281 [16:26<51:21, 133.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.86662292480469, 'l2_loss': 67.46067810058594, 'l1_loss': 57.405948638916016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 16%|█▌        | 75826/488281 [16:26<51:10, 134.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 75854/488281 [16:26<59:13, 116.05it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 75882/488281 [16:27<54:52, 125.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 75910/488281 [16:27<52:47, 130.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.07328033447266, 'l2_loss': 67.2716293334961, 'l1_loss': 56.80165100097656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 16%|█▌        | 75938/488281 [16:27<51:43, 132.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 75966/488281 [16:27<51:11, 134.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 75994/488281 [16:27<50:56, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 54
acts.shape=torch.Size([4096, 4096])
{'loss': 124.1268310546875, 'l2_loss': 67.02896118164062, 'l1_loss': 57.097869873046875}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.29it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 16%|█▌        | 76022/488281 [16:32<8:37:13, 13.28it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76033/488281 [16:32<6:50:59, 16.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76047/488281 [16:32<4:57:48, 23.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76061/488281 [16:33<3:41:14, 31.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76075/488281 [16:33<2:48:55, 40.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76089/488281 [16:33<2:12:53, 51.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.07759094238281, 'l2_loss': 67.06504821777344, 'l1_loss': 57.01254653930664}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76103/488281 [16:33<1:47:59, 63.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76117/488281 [16:33<1:30:41, 75.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76131/488281 [16:33<1:18:36, 87.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76145/488281 [16:33<1:10:12, 97.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76159/488281 [16:33<1:04:20, 106.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76173/488281 [16:33<1:00:14, 114.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76187/488281 [16:33<57:21, 119.73it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76201/488281 [16:34<55:22, 124.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.44764709472656, 'l2_loss': 67.2261962890625, 'l1_loss': 57.22145080566406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76215/488281 [16:34<53:57, 127.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76229/488281 [16:34<1:03:49, 107.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76243/488281 [16:34<59:52, 114.69it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76257/488281 [16:34<57:06, 120.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76271/488281 [16:34<55:11, 124.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76285/488281 [16:34<53:49, 127.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76299/488281 [16:34<52:52, 129.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.74639129638672, 'l2_loss': 66.78348541259766, 'l1_loss': 56.96290588378906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76313/488281 [16:34<52:36, 130.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76327/488281 [16:35<52:02, 131.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76341/488281 [16:35<51:37, 133.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76355/488281 [16:35<51:19, 133.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76369/488281 [16:35<51:07, 134.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76383/488281 [16:35<51:00, 134.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76397/488281 [16:35<50:53, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76411/488281 [16:35<50:51, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.53023529052734, 'l2_loss': 67.54794311523438, 'l1_loss': 56.98229217529297}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76439/488281 [16:35<58:31, 117.29it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76467/488281 [16:36<54:30, 125.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76495/488281 [16:36<52:32, 130.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76523/488281 [16:36<51:35, 133.02it/s]

{'loss': 123.84178924560547, 'l2_loss': 66.8953628540039, 'l1_loss': 56.94642639160156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 16%|█▌        | 76551/488281 [16:36<51:06, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76579/488281 [16:36<50:53, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76607/488281 [16:37<50:47, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.724365234375, 'l2_loss': 67.57025146484375, 'l1_loss': 57.154117584228516}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76621/488281 [16:37<1:01:37, 111.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76649/488281 [16:37<56:00, 122.48it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76677/488281 [16:37<53:16, 128.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76705/488281 [16:37<51:56, 132.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.74794006347656, 'l2_loss': 67.48721313476562, 'l1_loss': 57.2607307434082}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 16%|█▌        | 76733/488281 [16:38<51:15, 133.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76761/488281 [16:38<50:55, 134.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76789/488281 [16:38<50:45, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76817/488281 [16:38<58:15, 117.73it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.01317596435547, 'l2_loss': 67.50487518310547, 'l1_loss': 57.50830078125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 16%|█▌        | 76845/488281 [16:39<54:29, 125.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76873/488281 [16:39<52:30, 130.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76901/488281 [16:39<51:33, 132.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.50638580322266, 'l2_loss': 67.16059875488281, 'l1_loss': 57.345787048339844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 16%|█▌        | 76929/488281 [16:39<51:03, 134.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76957/488281 [16:39<50:48, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 76985/488281 [16:40<50:42, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 76999/488281 [16:40<1:01:47, 110.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.63143920898438, 'l2_loss': 66.89106750488281, 'l1_loss': 56.7403678894043}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.885429859161377 4.321150302886963 8.964346885681152
91.42%
Reconstruction: (0.9142099700683327, 3.885429859161377, 4.321150302886963, 8.964346885681152)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 12.28it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 12.24it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 16%|█▌        | 77011/488281 [16:44<12:32:46,  9.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77025/488281 [16:45<8:55:37, 12.80it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77039/488281 [16:45<6:26:59, 17.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77053/488281 [16:45<4:44:34, 24.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77067/488281 [16:45<3:33:41, 32.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77081/488281 [16:45<2:44:24, 41.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77095/488281 [16:45<2:10:06, 52.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77109/488281 [16:45<1:46:13, 64.51it/s]

{'loss': 122.91203308105469, 'l2_loss': 66.56655883789062, 'l1_loss': 56.34547424316406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77123/488281 [16:45<1:29:30, 76.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77137/488281 [16:45<1:17:48, 88.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77151/488281 [16:45<1:09:39, 98.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77165/488281 [16:46<1:03:55, 107.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77179/488281 [16:46<59:56, 114.30it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77207/488281 [16:46<1:02:44, 109.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.86442565917969, 'l2_loss': 66.74422454833984, 'l1_loss': 57.120201110839844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 16%|█▌        | 77235/488281 [16:46<56:32, 121.16it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77263/488281 [16:46<53:29, 128.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77291/488281 [16:47<51:59, 131.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77319/488281 [16:47<51:18, 133.49it/s]

{'loss': 124.31550598144531, 'l2_loss': 67.39543151855469, 'l1_loss': 56.92007064819336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 16%|█▌        | 77347/488281 [16:47<51:10, 133.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77375/488281 [16:47<50:53, 134.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77389/488281 [16:47<1:01:50, 110.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.38606262207031, 'l2_loss': 67.61491394042969, 'l1_l

 16%|█▌        | 77417/488281 [16:48<56:06, 122.05it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77445/488281 [16:48<53:16, 128.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77473/488281 [16:48<51:54, 131.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77501/488281 [16:48<51:15, 133.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.36841583251953, 'l2_loss': 67.47201538085938, 'l1_loss': 56.896400451660156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 16%|█▌        | 77529/488281 [16:48<50:54, 134.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77557/488281 [16:49<50:45, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77585/488281 [16:49<58:14, 117.52it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77613/488281 [16:49<54:21, 125.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97186279296875, 'l2_loss': 66.81404113769531, 'l1_loss': 57.1578254699707}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 16%|█▌        | 77641/488281 [16:49<52:26, 130.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77669/488281 [16:50<51:27, 132.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77697/488281 [16:50<50:57, 134.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.25819396972656, 'l2_loss': 67.26683044433594, 'l1_loss': 56.99135971069336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 16%|█▌        | 77725/488281 [16:50<50:47, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77753/488281 [16:50<50:41, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77781/488281 [16:50<58:09, 117.65it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77809/488281 [16:51<54:16, 126.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.90542602539062, 'l2_loss': 67.2509765625, 'l1_loss': 57.65445327758789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 16%|█▌        | 77837/488281 [16:51<52:20, 130.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77865/488281 [16:51<51:32, 132.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77893/488281 [16:51<51:01, 134.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 77921/488281 [16:51<50:45, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.4651107788086, 'l2_loss': 67.28437042236328, 'l1_loss': 57.18074035644531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 16%|█▌        | 77949/488281 [16:52<50:36, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77977/488281 [16:52<58:16, 117.34it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 77991/488281 [16:52<55:57, 122.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 55
acts.shape=torch.Size([4096, 4096])
{'loss': 125.0833740234375, 'l2_loss': 67.66275024414062, 'l1_loss': 57.42061996459961}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([6


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.24it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 12.43it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 12.31it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 16%|█▌        | 78004/488281 [16:57<12:13:41,  9.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78018/488281 [16:57<8:45:36, 13.01it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78032/488281 [16:57<6:21:32, 17.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78046/488281 [16:57<4:41:28, 24.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78060/488281 [16:57<3:31:48, 32.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78074/488281 [16:57<2:43:15, 41.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78088/488281 [16:57<2:09:21, 52.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78102/488281 [16:57<1:45:42, 64.68it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.54448699951172, 'l2_loss': 67.11445617675781, 'l1_loss': 57.430030822753906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78116/488281 [16:57<1:29:07, 76.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78130/488281 [16:58<1:17:31, 88.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78144/488281 [16:58<1:20:50, 84.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78158/488281 [16:58<1:11:43, 95.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78172/488281 [16:58<1:05:21, 104.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78186/488281 [16:58<1:00:52, 112.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78200/488281 [16:58<57:44, 118.38it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78214/488281 [16:58<55:37, 122.88it/s]

{'loss': 124.24414825439453, 'l2_loss': 67.34868621826172, 'l1_loss': 56.89546203613281}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78228/488281 [16:58<54:05, 126.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78242/488281 [16:59<53:00, 128.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78256/488281 [16:59<52:14, 130.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78270/488281 [16:59<51:42, 132.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78284/488281 [16:59<51:19, 133.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78298/488281 [16:59<51:03, 133.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.89673614501953, 'l2_loss': 66.96153259277344, 'l1_loss': 56.935203552246094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78312/488281 [16:59<50:54, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78326/488281 [16:59<50:47, 134.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78354/488281 [16:59<59:04, 115.64it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78382/488281 [17:00<54:43, 124.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78410/488281 [17:00<52:36, 129.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.32051086425781, 'l2_loss': 66.81948852539062, 'l1_loss': 57.50101852416992}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 16%|█▌        | 78438/488281 [17:00<51:30, 132.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78466/488281 [17:00<50:56, 134.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78494/488281 [17:00<50:43, 134.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78522/488281 [17:01<50:37, 134.90it/s]

{'loss': 124.56938171386719, 'l2_loss': 67.21043395996094, 'l1_loss': 57.358943939208984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 16%|█▌        | 78550/488281 [17:01<58:44, 116.26it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78578/488281 [17:01<54:28, 125.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78606/488281 [17:01<52:26, 130.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.52763366699219, 'l2_loss': 67.45989990234375, 'l1_loss': 57.0677375793457}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 16%|█▌        | 78634/488281 [17:02<51:40, 132.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78662/488281 [17:02<51:03, 133.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78690/488281 [17:02<50:45, 134.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78718/488281 [17:02<50:36, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.82942199707031, 'l2_loss': 67.34148406982422, 'l1_loss': 57.48794174194336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 16%|█▌        | 78732/488281 [17:02<1:02:11, 109.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78760/488281 [17:03<56:11, 121.48it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78788/488281 [17:03<53:15, 128.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.36444091796875,

 16%|█▌        | 78816/488281 [17:03<51:47, 131.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78844/488281 [17:03<51:04, 133.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78872/488281 [17:03<50:58, 133.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78900/488281 [17:04<50:44, 134.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.51408386230469, 'l2_loss': 67.44254302978516, 'l1_loss': 57.0715446472168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 16%|█▌        | 78928/488281 [17:04<58:49, 115.97it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 78956/488281 [17:04<54:32, 125.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78984/488281 [17:04<52:24, 130.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 78998/488281 [17:04<51:46, 131.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.42743682861328, 'l2_loss': 67.22212982177734, 'l1_loss': 57.20530700683594}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8236632347106934 4.260510444641113 8.851167678833008
91.31%
Reconstruction: (0.9131085382846075, 3.8236632347106934, 4.260510444641113, 8.851167678833008)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.44it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.89it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 16%|█▌        | 79026/488281 [17:09<8:32:19, 13.31it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 79054/488281 [17:09<4:36:43, 24.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 79082/488281 [17:10<2:41:18, 42.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 79096/488281 [17:10<2:08:00, 53.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.78337097167969, 'l2_loss': 67.54914855957031, 'l1_loss': 57.23421859741211}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 79124/488281 [17:10<1:43:18, 66.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 79152/488281 [17:10<1:16:17, 89.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 79180/488281 [17:10<1:03:03, 108.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 79208/488281 [17:11<56:35, 120.47it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.83872985839844, 'l2_loss': 67.28022003173828, 'l1_loss': 57.55850601196289}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 16%|█▌        | 79236/488281 [17:11<53:25, 127.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 79264/488281 [17:11<51:51, 131.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▌        | 79292/488281 [17:11<51:06, 133.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▌        | 79320/488281 [17:12<1:00:54, 111.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97888946533203, 'l2_loss': 66.62373352050781, 'l1_loss': 57.35515594482422}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 16%|█▋        | 79348/488281 [17:12<55:30, 122.78it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79376/488281 [17:12<53:10, 128.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▋        | 79404/488281 [17:12<51:43, 131.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.3603515625, 'l2_loss': 67.72259521484375, 'l1_loss': 57.63775634765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 16%|█▋        | 79432/488281 [17:12<51:00, 133.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79460/488281 [17:13<50:39, 134.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79474/488281 [17:13<50:35, 134.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▋        | 79502/488281 [17:13<58:52, 115.72it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.613525390625, 'l2_loss': 66.84618377685547, 'l1_loss': 56.76734161376953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 16%|█▋        | 79530/488281 [17:13<54:31, 124.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79558/488281 [17:13<52:21, 130.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79586/488281 [17:14<51:19, 132.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79614/488281 [17:14<50:51, 133.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34159851074219, 'l2_loss': 67.30995178222656, 'l1_loss': 57.03164291381836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 16%|█▋        | 79642/488281 [17:14<50:33, 134.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79670/488281 [17:14<50:25, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▋        | 79698/488281 [17:15<58:39, 116.10it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.57167053222656, 'l2_loss': 67.47535705566406, 'l1_loss': 57.0963134765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 16%|█▋        | 79726/488281 [17:15<54:24, 125.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79754/488281 [17:15<52:18, 130.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79782/488281 [17:15<51:15, 132.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79810/488281 [17:15<50:48, 134.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.60238647460938, 'l2_loss': 67.57302856445312, 'l1_loss': 57.029354095458984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 16%|█▋        | 79838/488281 [17:16<50:31, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79866/488281 [17:16<50:24, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▋        | 79894/488281 [17:16<58:48, 115.75it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▋        | 79922/488281 [17:16<54:26, 125.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.03185272216797, 'l2_loss': 66.76454162597656, 'l1_loss': 57.267311096191406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 16%|█▋        | 79950/488281 [17:16<52:19, 130.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79978/488281 [17:17<51:15, 132.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 79992/488281 [17:17<50:57, 133.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 56
acts.shape=torch.Size([4096, 4096])
{'loss': 124.28672790527344, 'l2_loss': 67.00711059570312, 'l1_loss': 57.27962112426758}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.86667


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.44it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.89it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 16%|█▋        | 80020/488281 [17:21<8:32:27, 13.28it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80048/488281 [17:22<4:36:41, 24.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80075/488281 [17:22<2:54:29, 38.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▋        | 80103/488281 [17:22<1:50:31, 61.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.6387710571289, 'l2_loss': 66.91217041015625, 'l1_loss': 56.726600646972656}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 16%|█▋        | 80131/488281 [17:22<1:19:37, 85.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80159/488281 [17:23<1:04:34, 105.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80187/488281 [17:23<57:14, 118.83it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80215/488281 [17:23<53:41, 126.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.84635162353516, 'l2_loss': 66.97735595703125, 'l1_loss': 56.868995666503906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 16%|█▋        | 80243/488281 [17:23<51:55, 130.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80257/488281 [17:23<1:02:05, 109.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▋        | 80285/488281 [17:24<56:02, 121.33it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80313/488281 [17:24<53:05, 128.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.04472351074219, 'l2_loss': 66.76472473144531, 'l1_loss': 57.28000259399414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 16%|█▋        | 80341/488281 [17:24<51:37, 131.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80369/488281 [17:24<50:53, 133.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80397/488281 [17:24<50:52, 133.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.8365478515625, 'l2_loss': 67.42688751220703, 'l1_lo

 16%|█▋        | 80425/488281 [17:25<50:34, 134.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80439/488281 [17:25<50:27, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

 16%|█▋        | 80467/488281 [17:25<57:58, 117.24it/s]  


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 16%|█▋        | 80495/488281 [17:25<54:00, 125.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.7748031616211, 'l2_loss': 67.42222595214844, 'l1_loss': 57.352577209472656}
acts.shape=

 16%|█▋        | 80523/488281 [17:25<52:02, 130.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 16%|█▋        | 80551/488281 [17:26<51:06, 132.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 80579/488281 [17:26<50:37, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 80607/488281 [17:26<50:24, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.61453247070312, 'l2_loss': 67.48118591308594, 'l1_loss': 57.13334274291992}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 80635/488281 [17:26<50:16, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 80663/488281 [17:27<57:43, 117.68it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 80691/488281 [17:27<53:51, 126.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 80719/488281 [17:27<52:00, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.63648986816406, 'l2_loss': 67.39202880859375, 'l1_loss': 57.24446487426758}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 80747/488281 [17:27<51:01, 133.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 80775/488281 [17:27<50:34, 134.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 80803/488281 [17:28<50:24, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.5843505859375, 'l2_loss': 67.42927551269531, 'l1_loss': 57.15507125854492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 17%|█▋        | 80831/488281 [17:28<50:14, 135.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 80845/488281 [17:28<1:00:53, 111.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 80873/488281 [17:28<55:22, 122.61it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 80901/488281 [17:28<52:45, 128.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.00994873046875, 'l2_loss': 67.78072357177734, 'l1_loss': 57.22922134399414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 80929/488281 [17:29<51:35, 131.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 80957/488281 [17:29<50:52, 133.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 80985/488281 [17:29<50:29, 134.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 80999/488281 [17:29<50:22, 134.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.69160461425781, 'l2_loss': 67.57142639160156, 'l1_loss': 57.120174407958984}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9621422290802 4.383300304412842 8.985384941101074
91.62%
Reconstruction: (0.9161581274333432, 3.9621422290802, 4.383300304412842, 8.985384941101074)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.47it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.90it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 17%|█▋        | 81013/488281 [17:34<11:45:27,  9.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 81038/488281 [17:34<6:34:08, 17.22it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 81066/488281 [17:34<3:34:41, 31.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81094/488281 [17:34<2:09:49, 52.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.19448852539062, 'l2_loss': 67.45539855957031, 'l1_loss': 57.73908996582031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 81122/488281 [17:35<1:28:58, 76.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81150/488281 [17:35<1:09:04, 98.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81178/488281 [17:35<59:24, 114.23it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81206/488281 [17:35<54:43, 123.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.73251342773438, 'l2_loss': 67.55030822753906, 'l1_loss': 57.18220901489258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 81234/488281 [17:35<59:51, 113.34it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 81262/488281 [17:36<54:53, 123.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81290/488281 [17:36<52:26, 129.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81318/488281 [17:36<51:13, 132.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.2579116821289, 'l2_loss': 67.11016082763672, 'l1_loss': 57.14775085449219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 17%|█▋        | 81346/488281 [17:36<50:38, 133.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81374/488281 [17:37<50:21, 134.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81402/488281 [17:37<50:14, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.10415649414062, 'l2_loss': 67.63580322265625, 'l1_loss': 57.46834945678711}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 81430/488281 [17:37<57:56, 117.03it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 81458/488281 [17:37<53:56, 125.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81486/488281 [17:37<51:56, 130.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81514/488281 [17:38<50:59, 132.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.22378540039062, 'l2_loss': 67.32342529296875, 'l1_loss': 56.90036392211914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 81542/488281 [17:38<50:32, 134.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81570/488281 [17:38<50:18, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81598/488281 [17:38<50:12, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 81612/488281 [17:38<1:00:47, 111.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34208679199219, 'l2_loss': 66.83609008789062, 'l1_loss': 57.50600051879883}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 81640/488281 [17:39<55:17, 122.56it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81668/488281 [17:39<52:37, 128.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81696/488281 [17:39<51:19, 132.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.6116943359375, 'l2_loss': 67.33064270019531, 'l1_loss': 57.28104782104492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 17%|█▋        | 81724/488281 [17:39<50:40, 133.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81752/488281 [17:39<50:20, 134.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81780/488281 [17:40<50:13, 134.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 81808/488281 [17:40<57:42, 117.40it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.01974487304688, 'l2_loss': 67.30419921875, 'l1_loss': 56.71554946899414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 17%|█▋        | 81836/488281 [17:40<53:45, 125.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81864/488281 [17:40<51:51, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81892/488281 [17:41<50:54, 133.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.00727844238281,

 17%|█▋        | 81920/488281 [17:41<50:27, 134.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81948/488281 [17:41<50:23, 134.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 81976/488281 [17:41<50:10, 134.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 81990/488281 [17:41<1:01:00, 111.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 57
acts.shape=torch.Size([4096, 4096])
{'loss': 123.8332748413086, 'l2_loss': 66.92793273925781, 'l1_loss': 56.90534210205078}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8255984783172607 4.266841888427734 8.93031


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.78it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.77it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


 17%|█▋        | 82002/488281 [17:46<12:23:46,  9.10it/s]

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82016/488281 [17:46<8:49:09, 12.80it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82030/488281 [17:46<6:22:18, 17.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82044/488281 [17:46<4:41:07, 24.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82058/488281 [17:46<3:31:03, 32.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82072/488281 [17:46<2:42:23, 41.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82086/488281 [17:47<2:08:30, 52.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82100/488281 [17:47<1:44:50, 64.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.81661987304688, 'l2_loss': 66.7889404296875, 'l1_loss': 57.027679443359375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 82114/488281 [17:47<1:28:21, 76.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82128/488281 [17:47<1:16:48, 88.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82142/488281 [17:47<1:08:45, 98.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82156/488281 [17:47<1:03:07, 107.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82170/488281 [17:47<59:11, 114.34it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82184/488281 [17:47<1:07:03, 100.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82212/488281 [17:48<58:23, 115.91it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.76373291015625, 'l2_loss': 67.02582550048828, 'l1_loss': 56.737911224365234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 17%|█▋        | 82240/488281 [17:48<54:05, 125.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82268/488281 [17:48<51:59, 130.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82296/488281 [17:48<50:58, 132.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82324/488281 [17:48<50:26, 134.12it/s]

{'loss': 125.22096252441406, 'l2_loss': 67.83568572998047, 'l1_loss': 57.385276794433594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 17%|█▋        | 82352/488281 [17:49<50:13, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82366/488281 [17:49<50:08, 134.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82394/488281 [17:49<1:00:49, 111.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82422/488281 [17:49<55:19, 122.28it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.02995300292969, 'l2_loss': 67.15026092529297, 'l1_loss': 56.87969207763672}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 82450/488281 [17:49<52:45, 128.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82478/488281 [17:50<51:18, 131.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82506/488281 [17:50<50:36, 133.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.61737060546875, 'l2_loss': 67.64104461669922, 'l1_loss': 56.9763298034668}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 17%|█▋        | 82534/488281 [17:50<50:16, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82548/488281 [17:50<50:11, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82576/488281 [17:50<58:25, 115.73it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82604/488281 [17:51<54:07, 124.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.5935287475586, 'l2_loss': 66.72268676757812, 'l1_loss': 56.87084197998047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 17%|█▋        | 82632/488281 [17:51<51:58, 130.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82660/488281 [17:51<50:54, 132.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82688/488281 [17:51<50:23, 134.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82716/488281 [17:52<50:10, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.14399719238281, 'l2_loss': 67.22372436523438, 'l1_loss': 56.9202766418457}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 17%|█▋        | 82744/488281 [17:52<50:03, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82772/488281 [17:52<58:14, 116.04it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82800/488281 [17:52<53:57, 125.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.20620727539062, 'l2_loss': 67.2272720336914, 'l1_loss': 56.97893142700195}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 17%|█▋        | 82828/488281 [17:52<51:55, 130.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82856/488281 [17:53<50:53, 132.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82884/488281 [17:53<50:21, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 82912/488281 [17:53<50:08, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.62042236328125, 'l2_loss': 66.74600982666016, 'l1_loss': 56.87441635131836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 82940/488281 [17:53<50:02, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82968/488281 [17:54<59:16, 113.96it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 82996/488281 [17:54<54:27, 124.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.41it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.88it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 17%|█▋        | 83023/488281 [17:59<8:39:49, 12.99it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83051/488281 [17:59<4:38:20, 24.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83079/488281 [17:59<2:41:24, 41.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83107/488281 [17:59<1:44:27, 64.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.60102844238281, 'l2_loss': 67.09916687011719, 'l1_loss': 57.50185775756836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 83135/488281 [17:59<1:16:35, 88.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83149/488281 [18:00<1:19:10, 85.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83177/488281 [18:00<1:04:13, 105.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83205/488281 [18:00<56:55, 118.62it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.17646789550781, 'l2_loss': 67.16619110107422, 'l1_loss': 57.01028060913086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 83233/488281 [18:00<53:18, 126.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83261/488281 [18:00<51:33, 130.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83289/488281 [18:01<50:40, 133.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.41767883300781,

 17%|█▋        | 83317/488281 [18:01<50:17, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83345/488281 [18:01<57:32, 117.29it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83373/488281 [18:01<53:37, 125.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83401/488281 [18:01<51:42, 130.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.7655029296875, 'l2_loss': 67.59196472167969, 'l1_loss': 57.17353439331055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 17%|█▋        | 83429/488281 [18:02<50:43, 133.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83457/488281 [18:02<50:34, 133.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83485/488281 [18:02<50:09, 134.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83513/488281 [18:02<50:00, 134.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.84431457519531, 'l2_loss': 67.64592742919922, 'l1_loss': 57.19838333129883}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 83541/488281 [18:03<57:31, 117.28it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83569/488281 [18:03<53:35, 125.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83597/488281 [18:03<51:39, 130.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05897521972656,

 17%|█▋        | 83625/488281 [18:03<50:43, 132.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83653/488281 [18:03<50:13, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83681/488281 [18:04<50:00, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83709/488281 [18:04<49:57, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.917724609375, 'l2_loss': 67.27965545654297, 'l1_loss': 56.6380729675293}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83737/488281 [18:04<57:19, 117.60it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83765/488281 [18:04<53:27, 126.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83793/488281 [18:04<51:35, 130.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83821/488281 [18:05<50:39, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.26060485839844, 'l2_loss': 67.47442626953125, 'l1_loss': 56.78618240356445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 83849/488281 [18:05<50:12, 134.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83877/488281 [18:05<49:59, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83891/488281 [18:05<49:57, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.55321502685547, 'l2_loss': 67.61555480957031, 'l1_loss': 56.937660217285156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83919/488281 [18:05<57:34, 117.07it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83947/488281 [18:06<53:34, 125.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 83975/488281 [18:06<51:51, 129.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 83989/488281 [18:06<51:16, 131.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 58
acts.shape=torch.Size([4096, 4096])
{'loss': 124.54237365722656, 'l2_loss': 67.47938537597656, 'l1_loss': 57.062992095947266}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8440399169921875 4.286786079406738 8.915327072143555
91.27%
Reconstruction: (0.9126955053285016, 3.8440399169921


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.39it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.74it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 17%|█▋        | 84017/488281 [18:11<8:28:15, 13.26it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84045/488281 [18:11<4:34:24, 24.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84073/488281 [18:11<2:39:48, 42.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84101/488281 [18:11<1:54:11, 58.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.03109741210938, 'l2_loss': 66.76136779785156, 'l1_loss': 57.26972579956055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 84129/488281 [18:12<1:21:20, 82.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84157/488281 [18:12<1:05:12, 103.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84185/488281 [18:12<57:17, 117.55it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84213/488281 [18:12<53:28, 125.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.11953735351562, 'l2_loss': 67.34076690673828, 'l1_loss': 56.778770446777344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 17%|█▋        | 84241/488281 [18:12<51:34, 130.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84269/488281 [18:13<50:38, 132.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84283/488281 [18:13<50:22, 133.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 84311/488281 [18:13<57:30, 117.06it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.11639404296875, 'l2_loss': 67.41121673583984, 'l1_loss': 56.70517349243164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 84339/488281 [18:13<53:32, 125.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84367/488281 [18:13<51:35, 130.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84395/488281 [18:14<50:37, 132.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.034912109375, 'l2_loss': 67.26049041748047, 'l1_los

 17%|█▋        | 84423/488281 [18:14<50:11, 134.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84451/488281 [18:14<49:57, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84479/488281 [18:14<50:12, 134.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 84493/488281 [18:14<1:01:04, 110.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.2759780883789, 'l2_loss': 67.65391540527344, 'l1_loss': 57.62206268310547}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 17%|█▋        | 84521/488281 [18:15<55:19, 121.62it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84549/488281 [18:15<52:25, 128.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84577/488281 [18:15<50:59, 131.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84605/488281 [18:15<50:21, 133.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.74279022216797, 'l2_loss': 66.84707641601562, 'l1_loss': 56.895713806152344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 17%|█▋        | 84633/488281 [18:16<50:00, 134.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84661/488281 [18:16<49:50, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 84689/488281 [18:16<57:19, 117.35it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 84717/488281 [18:16<53:26, 125.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.56509399414062, 'l2_loss': 67.24920654296875, 'l1_loss': 57.315887451171875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 17%|█▋        | 84745/488281 [18:16<51:31, 130.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84773/488281 [18:17<50:34, 132.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84801/488281 [18:17<50:07, 134.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.50778198242188, 'l2_loss': 67.01628112792969, 'l1_loss': 57.49150466918945}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 84829/488281 [18:17<49:52, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84857/488281 [18:17<49:45, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 84885/488281 [18:18<57:09, 117.62it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 84913/488281 [18:18<53:20, 126.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.11882019042969, 'l2_loss': 67.01153564453125, 'l1_loss': 57.10728073120117}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 84941/488281 [18:18<51:26, 130.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84969/488281 [18:18<50:30, 133.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 84997/488281 [18:18<50:13, 133.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.47137451171875,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.30it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 17%|█▋        | 85025/488281 [18:23<8:24:10, 13.33it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 85053/488281 [18:23<4:32:18, 24.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 85080/488281 [18:24<2:56:27, 38.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 85108/488281 [18:24<1:51:17, 60.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.76312255859375, 'l2_loss': 67.61648559570312, 'l1_loss': 57.14664077758789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 85136/488281 [18:24<1:19:41, 84.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 85164/488281 [18:24<1:04:18, 104.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 85192/488281 [18:24<56:47, 118.31it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 85220/488281 [18:25<53:07, 126.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.48866271972656, 'l2_loss': 67.27961730957031, 'l1_loss': 57.20904541015625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 85234/488281 [18:25<52:03, 129.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 85262/488281 [18:25<59:52, 112.20it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 17%|█▋        | 85290/488281 [18:25<54:35, 123.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.04469299316406,

 17%|█▋        | 85318/488281 [18:25<52:02, 129.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 85346/488281 [18:26<50:46, 132.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 85374/488281 [18:26<50:08, 133.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 17%|█▋        | 85402/488281 [18:26<49:51, 134.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.27894592285156, 'l2_loss': 67.7469711303711, 'l1_loss': 57.531978607177734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 17%|█▋        | 85430/488281 [18:26<49:42, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 85458/488281 [18:27<58:13, 115.30it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 85486/488281 [18:27<53:47, 124.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 85514/488281 [18:27<51:51, 129.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.63897705078125, 'l2_loss': 67.60223388671875, 'l1_loss': 57.0367431640625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 85542/488281 [18:27<50:40, 132.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85570/488281 [18:27<50:05, 134.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85598/488281 [18:28<49:48, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.21221923828125, 'l2_loss': 67.82218933105469, 'l1_loss': 57.39003372192383}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 85626/488281 [18:28<49:40, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 85654/488281 [18:28<57:41, 116.31it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 85682/488281 [18:28<53:30, 125.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85710/488281 [18:29<51:30, 130.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.52366638183594, 'l2_loss': 67.06053161621094, 'l1_loss': 56.463134765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 18%|█▊        | 85738/488281 [18:29<50:29, 132.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85766/488281 [18:29<49:58, 134.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85794/488281 [18:29<49:44, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85822/488281 [18:29<49:39, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.54190063476562, 'l2_loss': 67.60708618164062, 'l1_loss': 56.934810638427734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 85836/488281 [18:30<1:01:20, 109.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 85864/488281 [18:30<55:18, 121.27it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85892/488281 [18:30<52:19, 128.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.18402099609375, 'l2_loss': 67.0653076171875, 'l1_loss': 57.11871337890625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 85920/488281 [18:30<50:53, 131.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85948/488281 [18:30<50:10, 133.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85976/488281 [18:31<49:49, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 85990/488281 [18:31<49:43, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 59
acts.shape=torch.Size([4096, 4096])
{'loss': 123.73817443847656, 'l2_loss': 67.02401733398438, 'l1_loss': 56.71415710449219}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.874119281768799 4.301673889160156 8.927720069885254
91.54%
Reconstruction: (0.9153960462415724, 3.874119281768799, 4.301673889160156, 8.927720069885254)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.39it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 18%|█▊        | 86016/488281 [18:36<9:00:42, 12.40it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 86044/488281 [18:36<4:44:44, 23.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86072/488281 [18:36<2:43:26, 41.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86100/488281 [18:36<1:44:57, 63.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.13704681396484, 'l2_loss': 67.149169921875, 'l1_loss': 56.987876892089844}
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 86128/488281 [18:36<1:16:32, 87.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86156/488281 [18:37<1:02:39, 106.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86184/488281 [18:37<55:52, 119.95it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86198/488281 [18:37<53:54, 124.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35411071777344, 'l2_loss': 67.29908752441406, 'l1_loss': 57.055023193359375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 86226/488281 [18:37<58:57, 113.66it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86254/488281 [18:37<54:02, 123.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86282/488281 [18:38<51:38, 129.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86310/488281 [18:38<50:29, 132.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.13204956054688, 'l2_loss': 67.6435546875, 'l1_loss': 57.488494873046875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 18%|█▊        | 86338/488281 [18:38<49:53, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86366/488281 [18:38<49:35, 135.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86394/488281 [18:38<49:27, 135.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 86422/488281 [18:39<56:42, 118.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.74706268310547, 'l2_loss': 67.54397583007812, 'l1_loss': 57.203086853027344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 18%|█▊        | 86450/488281 [18:39<52:55, 126.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86478/488281 [18:39<51:05, 131.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86506/488281 [18:39<50:12, 133.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.81658935546875, 'l2_loss': 67.68783569335938, 'l1_loss': 57.12874984741211}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 86534/488281 [18:39<49:52, 134.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86562/488281 [18:40<49:35, 135.00it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 86590/488281 [18:40<49:26, 135.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 86604/488281 [18:40<1:00:08, 111.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.426513671875, 'l2_loss': 67.55148315429688, 'l1_loss': 56.875030517578125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 86632/488281 [18:40<54:36, 122.57it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86660/488281 [18:41<51:53, 128.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86688/488281 [18:41<50:33, 132.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.517578125, 'l2_

 18%|█▊        | 86716/488281 [18:41<49:55, 134.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86744/488281 [18:41<49:35, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86772/488281 [18:41<49:26, 135.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 86800/488281 [18:42<56:41, 118.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.88192749023438, 'l2_loss': 67.08757019042969, 'l1_loss': 56.79436111450195}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 86828/488281 [18:42<52:55, 126.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86856/488281 [18:42<51:02, 131.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86884/488281 [18:42<50:08, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86912/488281 [18:42<49:42, 134.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.119384765625, 'l2_loss': 67.27165222167969, 'l1_loss': 56.84773254394531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 18%|█▊        | 86940/488281 [18:43<49:28, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 86968/488281 [18:43<49:22, 135.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 86996/488281 [18:43<56:39, 118.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.92237854003906, 'l2_loss': 67.73052978515625, 'l1_l


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.40it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 18%|█▊        | 87023/488281 [18:48<8:32:46, 13.04it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87051/488281 [18:48<4:34:37, 24.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87079/488281 [18:48<2:39:15, 41.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87107/488281 [18:48<1:43:05, 64.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.41000366210938, 'l2_loss': 67.3387451171875, 'l1_loss': 57.071258544921875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 87135/488281 [18:49<1:15:35, 88.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87163/488281 [18:49<1:02:08, 107.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87191/488281 [18:49<1:02:59, 106.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87219/488281 [18:49<55:58, 119.41it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.87862396240234, 'l2_loss': 67.04911804199219, 'l1_loss': 56.829505920410156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 18%|█▊        | 87247/488281 [18:50<52:31, 127.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87275/488281 [18:50<50:51, 131.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87303/488281 [18:50<50:02, 133.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.9958724975586, 'l2_loss': 67.15498352050781, 'l1_loss': 56.84088897705078}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 87331/488281 [18:50<49:36, 134.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87359/488281 [18:50<49:24, 135.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87373/488281 [18:51<59:54, 111.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87401/488281 [18:51<54:28, 122.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.58796691894531, 'l2_loss': 66.65702056884766, 'l1_loss': 56.930946350097656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 18%|█▊        | 87429/488281 [18:51<51:47, 129.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87457/488281 [18:51<50:27, 132.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87485/488281 [18:51<49:48, 134.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87513/488281 [18:52<49:31, 134.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.85177612304688, 'l2_loss': 67.68815612792969, 'l1_loss': 57.16362380981445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 87541/488281 [18:52<49:31, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87569/488281 [18:52<56:58, 117.21it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87597/488281 [18:52<52:59, 126.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.79119873046875, 'l2_loss': 66.88799285888672, 'l1_loss': 56.9032096862793}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 87625/488281 [18:52<51:03, 130.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87653/488281 [18:53<50:05, 133.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87681/488281 [18:53<49:36, 134.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87709/488281 [18:53<49:24, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.5030517578125, 'l2_loss': 67.00747680664062, 'l1_loss': 56.49557113647461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 87737/488281 [18:53<49:17, 135.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87765/488281 [18:54<56:39, 117.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87793/488281 [18:54<52:49, 126.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87821/488281 [18:54<50:57, 130.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.76798248291016, 'l2_loss': 67.68223571777344, 'l1_loss': 57.08574676513672}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 87849/488281 [18:54<50:01, 133.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87877/488281 [18:54<49:34, 134.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 87905/488281 [18:55<49:22, 135.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.50994873046875, 'l2_loss': 66.97119140625, 'l1_loss': 56.538753509521484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 18%|█▊        | 87933/488281 [18:55<49:15, 135.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87961/488281 [18:55<56:31, 118.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 87989/488281 [18:55<52:44, 126.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.39it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.42it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


 18%|█▊        | 88002/488281 [19:00<11:53:44,  9.35it/s]

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88016/488281 [19:00<8:31:17, 13.05it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88044/488281 [19:00<4:34:02, 24.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88072/488281 [19:00<2:38:55, 41.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88100/488281 [19:01<1:42:49, 64.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.14361572265625, 'l2_loss': 67.31255340576172, 'l1_loss': 56.8310661315918}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 88128/488281 [19:01<1:26:13, 77.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88156/488281 [19:01<1:07:18, 99.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88184/488281 [19:01<58:00, 114.94it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88212/488281 [19:02<53:29, 124.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.11595916748047, 'l2_loss': 67.55355834960938, 'l1_loss': 57.562400817871094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 18%|█▊        | 88240/488281 [19:02<51:15, 130.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88268/488281 [19:02<50:08, 132.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88296/488281 [19:02<49:37, 134.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88310/488281 [19:02<49:29, 134.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.8761215209961, 'l2_loss': 67.17967224121094, 'l1_loss': 56.696449279785156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88338/488281 [19:03<59:06, 112.76it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88366/488281 [19:03<53:59, 123.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88394/488281 [19:03<51:28, 129.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.61708068847656,

 18%|█▊        | 88422/488281 [19:03<50:16, 132.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88450/488281 [19:03<49:38, 134.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88478/488281 [19:04<49:20, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88506/488281 [19:04<49:14, 135.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.89324188232422, 'l2_loss': 66.78658294677734, 'l1_loss': 57.106658935546875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88534/488281 [19:04<57:21, 116.17it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88562/488281 [19:04<53:21, 124.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88590/488281 [19:05<51:09, 130.22it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88618/488281 [19:05<50:05, 132.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.7098617553711, 'l2_loss': 67.33563232421875, 'l1_loss': 56.374229431152344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 88646/488281 [19:05<49:34, 134.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88674/488281 [19:05<49:18, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88702/488281 [19:05<49:12, 135.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.69519805908203, 'l2_loss': 66.80811309814453, 'l1_loss': 56.8870849609375}
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 88716/488281 [19:06<1:01:05, 109.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88744/488281 [19:06<54:57, 121.16it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88772/488281 [19:06<51:55, 128.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88800/488281 [19:06<50:26, 131.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.07377624511719, 'l2_loss': 67.22442626953125, 'l1_loss': 56.84934616088867}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 88828/488281 [19:06<49:44, 133.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88856/488281 [19:07<49:21, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88884/488281 [19:07<49:11, 135.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 88912/488281 [19:07<57:58, 114.81it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.44723510742188, 'l2_loss': 67.3074951171875, 'l1_loss': 57.13973617553711}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 18%|█▊        | 88940/488281 [19:07<53:23, 124.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88968/488281 [19:08<51:09, 130.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 88996/488281 [19:08<50:03, 132.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.25624084472656, 'l2_loss': 67.19723510742188, 'l1_loss': 57.05900573730469}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
act


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.46it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 18%|█▊        | 89024/488281 [19:12<8:20:00, 13.31it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89052/488281 [19:13<4:29:59, 24.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89080/488281 [19:13<2:37:27, 42.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89093/488281 [19:13<2:17:18, 48.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.90873718261719, 'l2_loss': 66.83930206298828, 'l1_loss': 57.069435119628906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89121/488281 [19:13<1:31:48, 72.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89149/488281 [19:13<1:09:51, 95.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89177/488281 [19:14<59:10, 112.42it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89205/488281 [19:14<53:59, 123.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.55511474609375, 'l2_loss': 66.87141418457031, 'l1_loss': 56.68370056152344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 89233/488281 [19:14<51:25, 129.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89261/488281 [19:14<50:10, 132.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89275/488281 [19:14<49:49, 133.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89289/488281 [19:15<1:00:03, 110.73it/s]

acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89303/488281 [19:15<56:45, 117.16it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.76702117919922, 'l2_loss': 66.12651062011719, 'l1_loss': 56.64051055908203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 89317/488281 [19:15<54:25, 122.19it/s]

acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89331/488281 [19:15<52:47, 125.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89345/488281 [19:15<51:37, 128.78it/s]

acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89359/488281 [19:15<50:49, 130.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89373/488281 [19:15<50:15, 132.26it/s]

acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89387/488281 [19:15<49:52, 133.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89401/488281 [19:15<49:37, 133.96it/s]

{'loss': 124.09378051757812, 'l2_loss': 66.91958618164062, 'l1_loss': 57.1741943359375}
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89415/488281 [19:15<49:24, 134.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89429/488281 [19:16<49:16, 134.92it/s]

acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89443/488281 [19:16<49:10, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89457/488281 [19:16<49:06, 135.33it/s]

acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89471/488281 [19:16<49:03, 135.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89499/488281 [19:16<56:20, 117.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.73233795166016, 'l2_loss': 67.04719543457031, 'l1_loss': 56.685142517089844}
acts.shape

 18%|█▊        | 89527/488281 [19:16<52:34, 126.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89555/488281 [19:17<50:43, 131.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89583/488281 [19:17<50:04, 132.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89611/488281 [19:17<49:30, 134.21it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.28366088867188, 'l2_loss': 66.76019287109375, 'l1_loss': 56.52347183227539}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 18%|█▊        | 89639/488281 [19:17<49:12, 135.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89653/488281 [19:17<49:08, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89681/488281 [19:18<56:33, 117.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89709/488281 [19:18<52:41, 126.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.26869201660156, 'l2_loss': 67.51129150390625, 'l1_loss': 56.75739669799805}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 89737/488281 [19:18<50:45, 130.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89765/488281 [19:18<49:48, 133.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89793/488281 [19:18<49:20, 134.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.25965881347656,

 18%|█▊        | 89821/488281 [19:19<49:08, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89849/488281 [19:19<49:01, 135.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89877/488281 [19:19<56:15, 118.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 89905/488281 [19:19<52:31, 126.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.32612609863281, 'l2_loss': 67.14124298095703, 'l1_loss': 57.18488693237305}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 89933/488281 [19:19<50:40, 131.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89961/488281 [19:20<49:46, 133.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 89989/488281 [19:20<49:18, 134.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/50 [00:00<?, ?it/s][A
  4%|▍         | 2/50 [00:00<00:02, 18.82it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



  8%|▊         | 4/50 [00:00<00:03, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 12%|█▏        | 6/50 [00:00<00:03, 11.68it/s][A

acts.shape=torch.Size([65536, 4096])



 16%|█▌        | 8/50 [00:00<00:03, 11.20it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 20%|██        | 10/50 [00:00<00:03, 10.95it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 24%|██▍       | 12/50 [00:01<00:03, 10.81it/s][A

acts.shape=torch.Size([65536, 4096])



 28%|██▊       | 14/50 [00:01<00:03, 10.72it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 32%|███▏      | 16/50 [00:01<00:03, 10.66it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 36%|███▌      | 18/50 [00:01<00:03, 10.62it/s][A

acts.shape=torch.Size([65536, 4096])



 40%|████      | 20/50 [00:01<00:02, 10.59it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 44%|████▍     | 22/50 [00:02<00:02, 10.58it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 48%|████▊     | 24/50 [00:02<00:02, 10.56it/s][A

acts.shape=torch.Size([65536, 4096])



 52%|█████▏    | 26/50 [00:02<00:02, 10.55it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 56%|█████▌    | 28/50 [00:02<00:02, 10.55it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 60%|██████    | 30/50 [00:02<00:01, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])



 64%|██████▍   | 32/50 [00:02<00:01, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 68%|██████▊   | 34/50 [00:03<00:01, 10.46it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


 72%|███████▏  | 36/50 [00:03<00:01, 10.31it/s][A


acts.shape=torch.Size([65536, 4096])



 76%|███████▌  | 38/50 [00:03<00:01, 10.20it/s][A
 80%|████████  | 40/50 [00:03<00:00, 10.30it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 84%|████████▍ | 42/50 [00:03<00:00, 10.37it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 88%|████████▊ | 44/50 [00:04<00:00, 10.42it/s][A
 92%|█████████▏| 46/50 [00:04<00:00, 10.45it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 96%|█████████▌| 48/50 [00:04<00:00, 10.48it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 50/50 [00:04<00:00, 10.66it/s][A


acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
Resetting neurons! tensor(1, device='cuda:0')
torch.Size([4096, 512]) torch.Size([512, 4096]) torch.Size([4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.32099151611328, 'l2_loss': 67.32808685302734, 'l1_loss': 56.99290466308594}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.79451322555542 4.244482517242432 8.901965141296387
91.19%
Reconstruction: (0.9118994561064346, 3.79451322555542, 4.244482517242432, 8.901965141296387)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.35it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A
 18%|█▊        | 90003/488281 [19:29<22:53:24,  4.83it/s]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 90017/488281 [19:29<16:16:04,  6.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 90045/488281 [19:30<8:23:14, 13.19it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 90072/488281 [19:30<4:42:17, 23.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 90100/488281 [19:30<2:42:24, 40.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.44557189941406, 'l2_loss': 66.85128021240234, 'l1_loss': 56.59429168701172}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 90128/488281 [19:30<1:44:22, 63.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 90156/488281 [19:31<1:16:05, 87.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 90184/488281 [19:31<1:02:14, 106.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 90212/488281 [19:31<55:30, 119.51it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.03250122070312, 'l2_loss': 68.03429412841797, 'l1_loss': 56.99820327758789}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 18%|█▊        | 90226/488281 [19:31<53:33, 123.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 90254/488281 [19:31<58:32, 113.32it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 18%|█▊        | 90282/488281 [19:32<53:39, 123.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 18%|█▊        | 90310/488281 [19:32<51:17, 129.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.25454711914062, 'l2_loss': 67.32243347167969, 'l1_loss': 56.9321174621582}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 19%|█▊        | 90338/488281 [19:32<50:05, 132.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90366/488281 [19:32<49:30, 133.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90394/488281 [19:32<49:13, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.78907775878906, 'l2_loss': 67.69123840332031, 'l1_loss': 57.09783935546875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▊        | 90422/488281 [19:33<49:06, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▊        | 90450/488281 [19:33<56:21, 117.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▊        | 90478/488281 [19:33<52:34, 126.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90506/488281 [19:33<51:04, 129.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.15444946289062, 'l2_loss': 67.79820251464844, 'l1_loss': 57.35624313354492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▊        | 90534/488281 [19:33<49:58, 132.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90562/488281 [19:34<49:26, 134.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90590/488281 [19:34<49:12, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90618/488281 [19:34<49:06, 134.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.41121673583984, 'l2_loss': 67.93936920166016, 'l1_loss': 57.47184753417969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▊        | 90646/488281 [19:34<56:26, 117.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▊        | 90674/488281 [19:35<52:35, 125.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90702/488281 [19:35<50:44, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.81318664550781, 'l2_loss': 66.85113525390625, 'l1_loss': 56.96205139160156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▊        | 90730/488281 [19:35<49:48, 133.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90758/488281 [19:35<49:22, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90786/488281 [19:35<49:08, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90814/488281 [19:36<49:02, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35093688964844, 'l2_loss': 67.34922790527344, 'l1_loss': 57.001712799072266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▊        | 90828/488281 [19:36<59:20, 111.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▊        | 90856/488281 [19:36<53:59, 122.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90884/488281 [19:36<51:23, 128.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90912/488281 [19:36<50:09, 132.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.88710021972656, 'l2_loss': 66.95806121826172, 'l1_loss': 56.92903518676758}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▊        | 90940/488281 [19:37<49:30, 133.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90968/488281 [19:37<49:09, 134.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 90996/488281 [19:37<49:01, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.84564208984375, 'l2_loss': 66.48985290527344, 'l1_loss': 56.35578918457031}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9019579887390137 4.333783


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 11.82it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 10.66it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])





Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▊        | 91024/488281 [19:42<8:31:27, 12.95it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▊        | 91052/488281 [19:42<4:35:30, 24.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91080/488281 [19:42<2:39:55, 41.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91108/488281 [19:43<1:43:17, 64.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.43163299560547, 'l2_loss': 66.78688049316406, 'l1_loss': 56.644752502441406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 19%|█▊        | 91136/488281 [19:43<1:15:31, 87.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91164/488281 [19:43<1:01:56, 106.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91192/488281 [19:43<55:16, 119.73it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▊        | 91220/488281 [19:43<59:17, 111.61it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.83335876464844, 'l2_loss': 67.23786926269531, 'l1_loss': 57.595489501953125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 19%|█▊        | 91248/488281 [19:44<53:59, 122.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91276/488281 [19:44<51:22, 128.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91304/488281 [19:44<50:06, 132.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.78846740722656, 'l2_loss': 67.77327728271484, 'l1_loss': 57.015193939208984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 19%|█▊        | 91332/488281 [19:44<49:28, 133.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91360/488281 [19:44<49:07, 134.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91388/488281 [19:45<49:00, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▊        | 91416/488281 [19:45<56:14, 117.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.83375549316406, 'l2_loss': 66.77629089355469, 'l1_loss': 57.05746078491211}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▊        | 91444/488281 [19:45<52:26, 126.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91472/488281 [19:45<50:35, 130.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▊        | 91500/488281 [19:46<49:42, 133.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.19564056396484,

 19%|█▊        | 91528/488281 [19:46<49:26, 133.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 91556/488281 [19:46<49:09, 134.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 91570/488281 [19:46<49:03, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 91598/488281 [19:46<56:23, 117.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.64217376708984, 'l2_loss': 66.78807067871094, 'l1_loss': 56.854103088378906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 19%|█▉        | 91626/488281 [19:47<52:32, 125.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 91654/488281 [19:47<50:38, 130.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 91682/488281 [19:47<49:40, 133.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 91710/488281 [19:47<49:15, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.42607116699219, 'l2_loss': 67.27532958984375, 'l1_loss': 57.15074157714844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 91738/488281 [19:47<49:01, 134.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 91766/488281 [19:48<48:55, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 91794/488281 [19:48<56:08, 117.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.63876342773438, 'l2_loss': 66.6085205078125, 'l1_lo

 19%|█▉        | 91822/488281 [19:48<52:23, 126.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 91850/488281 [19:48<50:32, 130.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 91878/488281 [19:48<49:37, 133.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 91906/488281 [19:49<49:13, 134.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.32880401611328, 'l2_loss': 66.92479705810547, 'l1_loss': 56.40400695800781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 91934/488281 [19:49<49:00, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 91962/488281 [19:49<48:52, 135.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 91990/488281 [19:49<56:05, 117.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.34it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 19%|█▉        | 92017/488281 [19:54<8:26:53, 13.03it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92045/488281 [19:54<4:31:43, 24.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92073/488281 [19:55<2:37:35, 41.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92101/488281 [19:55<1:42:01, 64.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.54044342041016, 'l2_loss': 67.87210845947266, 'l1_loss': 56.6683349609375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 19%|█▉        | 92129/488281 [19:55<1:14:51, 88.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92157/488281 [19:55<1:01:33, 107.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92185/488281 [19:55<1:02:23, 105.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92213/488281 [19:56<55:29, 118.97it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.33322143554688, 'l2_loss': 67.38038635253906, 'l1_loss': 56.95283889770508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 92241/488281 [19:56<52:01, 126.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92269/488281 [19:56<50:20, 131.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92297/488281 [19:56<49:31, 133.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92325/488281 [19:56<49:08, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.54400634765625, 'l2_loss': 66.54115295410156, 'l1_loss': 57.00285339355469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 92339/488281 [19:57<49:00, 134.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92367/488281 [19:57<56:08, 117.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92395/488281 [19:57<52:20, 126.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.06187438964844, 'l2_loss': 67.32611083984375, 'l1_loss': 56.73576736450195}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 92423/488281 [19:57<50:30, 130.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92451/488281 [19:57<49:36, 133.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92479/488281 [19:58<49:08, 134.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92507/488281 [19:58<48:57, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.74479675292969, 'l2_loss': 67.52748107910156, 'l1_loss': 57.217315673828125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 19%|█▉        | 92535/488281 [19:58<48:51, 135.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92563/488281 [19:58<56:11, 117.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92591/488281 [19:59<52:23, 125.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92619/488281 [19:59<50:32, 130.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.1904296875, 'l2_loss': 66.9256591796875, 'l1_loss': 57.264766693115234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 19%|█▉        | 92647/488281 [19:59<49:34, 133.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92675/488281 [19:59<49:07, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92703/488281 [19:59<48:56, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.02326965332031, 'l2_loss': 67.8542709350586, 'l1_loss': 57.169002532958984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 92731/488281 [20:00<48:48, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92759/488281 [20:00<56:06, 117.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92787/488281 [20:00<52:19, 125.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92815/488281 [20:00<50:28, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.63752746582031, 'l2_loss': 66.96458435058594, 'l1_loss': 56.672943115234375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 19%|█▉        | 92843/488281 [20:01<49:33, 132.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92871/488281 [20:01<49:05, 134.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92899/488281 [20:01<48:50, 134.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.50947570800781,

 19%|█▉        | 92927/488281 [20:01<48:46, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92941/488281 [20:01<59:07, 111.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 92969/488281 [20:02<53:47, 122.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 92997/488281 [20:02<51:10, 128.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.4195327758789, 'l2_loss': 67.11390686035156, 'l1_loss': 56.305625915527344}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9272377490997314 4.357616


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 11.26it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 10.48it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 10.45it/s]
 19%|█▉        | 93011/488281 [20:06<11:38:44,  9.43it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93039/488281 [20:07<6:07:08, 17.94it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93067/488281 [20:07<3:24:52, 32.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93095/488281 [20:07<2:05:11, 52.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93109/488281 [20:07<1:42:15, 64.41it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.33839416503906, 'l2_loss': 66.8783187866211, 'l1_loss': 56.460079193115234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93137/488281 [20:07<1:29:03, 73.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93165/488281 [20:08<1:08:25, 96.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93193/488281 [20:08<58:19, 112.89it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.69131469726562,

 19%|█▉        | 93221/488281 [20:08<53:23, 123.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93249/488281 [20:08<50:55, 129.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93277/488281 [20:09<49:44, 132.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93305/488281 [20:09<49:11, 133.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.53995513916016, 'l2_loss': 67.22810363769531, 'l1_loss': 57.311851501464844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93333/488281 [20:09<56:41, 116.09it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93361/488281 [20:09<52:35, 125.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93389/488281 [20:09<50:31, 130.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93417/488281 [20:10<49:32, 132.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.05363464355469, 'l2_loss': 67.46451568603516, 'l1_loss': 57.58911895751953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 93445/488281 [20:10<49:03, 134.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93473/488281 [20:10<48:48, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93501/488281 [20:10<48:41, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.42312622070312, 'l2_loss': 67.16505432128906, 'l1_loss': 57.2580680847168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93529/488281 [20:11<57:14, 114.94it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93557/488281 [20:11<52:48, 124.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93571/488281 [20:11<51:42, 127.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93599/488281 [20:11<50:08, 131.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.60488891601562, 'l2_loss': 66.91311645507812, 'l1_loss': 56.6917724609375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 19%|█▉        | 93627/488281 [20:11<49:21, 133.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93655/488281 [20:11<48:57, 134.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93683/488281 [20:12<48:45, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93711/488281 [20:12<56:35, 116.20it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.68328857421875, 'l2_loss': 66.67762756347656, 'l1_loss': 57.00566482543945}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 93739/488281 [20:12<52:29, 125.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93767/488281 [20:12<50:29, 130.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93795/488281 [20:13<49:29, 132.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.56888580322266, 'l2_loss': 67.01467895507812, 'l1_loss': 56.55420684814453}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 93823/488281 [20:13<49:01, 134.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93851/488281 [20:13<48:47, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93879/488281 [20:13<48:40, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 93907/488281 [20:14<56:27, 116.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.565673828125, 'l2_loss': 67.34847259521484, 'l1_loss': 57.217201232910156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 19%|█▉        | 93935/488281 [20:14<52:24, 125.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93963/488281 [20:14<50:24, 130.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 93991/488281 [20:14<49:27, 132.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.41it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.42it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 19%|█▉        | 94019/488281 [20:19<8:16:50, 13.23it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94047/488281 [20:19<4:28:06, 24.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94075/488281 [20:19<2:36:23, 42.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 94102/488281 [20:20<1:55:27, 56.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.67210388183594, 'l2_loss': 66.82746887207031, 'l1_loss': 56.84463119506836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 94130/488281 [20:20<1:20:58, 81.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94158/488281 [20:20<1:04:17, 102.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94186/488281 [20:20<56:09, 116.96it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94214/488281 [20:20<52:11, 125.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.40106201171875, 'l2_loss': 66.94127655029297, 'l1_loss': 56.45978546142578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 94242/488281 [20:21<50:15, 130.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94270/488281 [20:21<49:17, 133.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 94284/488281 [20:21<1:01:26, 106.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 94312/488281 [20:21<54:47, 119.83it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.20210266113281, 'l2_loss': 67.0285873413086, 'l1_loss': 57.17351150512695}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 19%|█▉        | 94340/488281 [20:21<51:30, 127.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94368/488281 [20:22<49:53, 131.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94396/488281 [20:22<49:06, 133.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.32218933105469, 'l2_loss': 67.21308135986328, 'l1_loss': 57.109107971191406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 19%|█▉        | 94424/488281 [20:22<48:43, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94452/488281 [20:22<48:32, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 94480/488281 [20:23<56:03, 117.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 94508/488281 [20:23<52:07, 125.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.12744140625, 'l2_loss': 66.90972900390625, 'l1_loss': 57.21771240234375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 19%|█▉        | 94536/488281 [20:23<50:11, 130.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94564/488281 [20:23<49:14, 133.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94592/488281 [20:23<48:59, 133.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.88268280029297,

 19%|█▉        | 94620/488281 [20:24<48:40, 134.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94648/488281 [20:24<48:30, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 94676/488281 [20:24<55:45, 117.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 94704/488281 [20:24<51:58, 126.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.21942138671875, 'l2_loss': 67.19058990478516, 'l1_loss': 57.028831481933594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 19%|█▉        | 94732/488281 [20:25<50:06, 130.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94760/488281 [20:25<49:10, 133.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94788/488281 [20:25<48:43, 134.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94816/488281 [20:25<48:31, 135.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.08103942871094, 'l2_loss': 67.1106185913086, 'l1_loss': 56.97042465209961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 19%|█▉        | 94844/488281 [20:25<48:24, 135.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 94872/488281 [20:26<55:35, 117.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 94900/488281 [20:26<51:51, 126.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.1845474243164, 

 19%|█▉        | 94928/488281 [20:26<50:02, 131.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94956/488281 [20:26<49:08, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94984/488281 [20:26<48:41, 134.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 94998/488281 [20:27<48:33, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34856414794922, 'l2_loss': 67.39151000976562, 'l1_loss': 56.957054138183594}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.848822832107544 4.285078525543213 8.956578254699707
91.46%
Reconstruction: (0.9145895491577255, 3.848822832107544, 4.285078525543213, 8.956578254699707)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.24it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 19%|█▉        | 95026/488281 [20:31<8:11:06, 13.35it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 95040/488281 [20:31<6:08:21, 17.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 19%|█▉        | 95068/488281 [20:32<3:25:07, 31.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 95096/488281 [20:32<2:05:20, 52.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.46444702148438, 'l2_loss': 66.8426513671875, 'l1_loss': 56.621795654296875}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 19%|█▉        | 95124/488281 [20:32<1:26:03, 76.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 95152/488281 [20:32<1:06:47, 98.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 95180/488281 [20:32<57:20, 114.25it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 19%|█▉        | 95208/488281 [20:33<52:45, 124.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.62289428710938, 'l2_loss': 67.11854553222656, 'l1_loss': 56.50435256958008}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 95222/488281 [20:33<51:25, 127.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 95250/488281 [20:33<57:11, 114.53it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95278/488281 [20:33<52:38, 124.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95306/488281 [20:33<50:26, 129.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.96943664550781, 'l2_loss': 67.54240417480469, 'l1_loss': 57.42702865600586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 95334/488281 [20:34<49:19, 132.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95362/488281 [20:34<48:47, 134.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95390/488281 [20:34<48:31, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.82687377929688, 'l2_loss': 68.17198944091797, 'l1_l

 20%|█▉        | 95418/488281 [20:34<48:24, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 95446/488281 [20:35<55:34, 117.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 95474/488281 [20:35<51:50, 126.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95502/488281 [20:35<50:01, 130.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.23340606689453, 'l2_loss': 66.90704345703125, 'l1_loss': 56.32636260986328}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 95530/488281 [20:35<49:07, 133.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95558/488281 [20:35<48:40, 134.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95586/488281 [20:36<48:27, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95614/488281 [20:36<48:35, 134.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.23137664794922, 'l2_loss': 67.14244842529297, 'l1_loss': 57.08892822265625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 95628/488281 [20:36<59:00, 110.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 95656/488281 [20:36<53:30, 122.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95684/488281 [20:36<50:48, 128.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95712/488281 [20:37<49:31, 132.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.23858642578125, 'l2_loss': 67.49179077148438, 'l1_loss': 56.74679946899414}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 95740/488281 [20:37<48:51, 133.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95768/488281 [20:37<48:31, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95796/488281 [20:37<48:23, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.45355224609375, 'l2_loss': 66.97393035888672, 'l1_loss': 56.4796257019043}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 95824/488281 [20:38<55:31, 117.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 95852/488281 [20:38<51:47, 126.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95880/488281 [20:38<49:56, 130.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95908/488281 [20:38<49:04, 133.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.59564208984375, 'l2_loss': 66.61100769042969, 'l1_loss': 55.98463821411133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 95936/488281 [20:38<48:37, 134.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95964/488281 [20:39<48:24, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 95992/488281 [20:39<48:40, 134.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 64
acts.shape=torch.Size([4096, 4096])
{'loss':


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.34it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 20%|█▉        | 96020/488281 [20:44<8:28:47, 12.85it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96048/488281 [20:44<4:33:51, 23.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96076/488281 [20:44<2:38:45, 41.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96104/488281 [20:44<1:42:23, 63.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.61019134521484, 'l2_loss': 66.9486083984375, 'l1_loss': 56.661582946777344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 96132/488281 [20:44<1:14:59, 87.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96160/488281 [20:45<1:01:19, 106.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96188/488281 [20:45<54:37, 119.63it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 96216/488281 [20:45<58:36, 111.49it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.51246643066406, 'l2_loss': 67.28561401367188, 'l1_loss': 57.22685241699219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 96244/488281 [20:45<53:16, 122.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96272/488281 [20:46<50:39, 128.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96300/488281 [20:46<49:22, 132.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.78559875488281,

 20%|█▉        | 96328/488281 [20:46<48:45, 133.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96356/488281 [20:46<48:27, 134.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96370/488281 [20:46<48:22, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 96398/488281 [20:47<55:27, 117.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.84180450439453, 'l2_loss': 67.5390625, 'l1_loss': 57.30274200439453}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.S

 20%|█▉        | 96426/488281 [20:47<51:44, 126.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96454/488281 [20:47<49:53, 130.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96482/488281 [20:47<48:59, 133.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96510/488281 [20:47<48:34, 134.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.20063781738281, 'l2_loss': 67.04513549804688, 'l1_loss': 57.1555061340332}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 20%|█▉        | 96538/488281 [20:48<48:20, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96566/488281 [20:48<48:13, 135.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 96594/488281 [20:48<55:21, 117.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.04351806640625, 'l2_loss': 66.61807250976562, 'l1_l

 20%|█▉        | 96622/488281 [20:48<51:40, 126.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 96650/488281 [20:49<50:04, 130.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96678/488281 [20:49<49:04, 133.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96706/488281 [20:49<48:35, 134.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05015563964844, 'l2_loss': 67.35291290283203, 'l1_loss': 56.697242736816406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 20%|█▉        | 96734/488281 [20:49<48:20, 134.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96762/488281 [20:49<48:14, 135.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 96790/488281 [20:50<55:32, 117.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 96818/488281 [20:50<51:45, 126.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.69805908203125, 'l2_loss': 66.94950866699219, 'l1_loss': 56.7485466003418}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 20%|█▉        | 96846/488281 [20:50<49:52, 130.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96874/488281 [20:50<48:57, 133.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96902/488281 [20:50<48:31, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.3631591796875, 'l2_loss': 67.4659423828125, 'l1_loss': 56.897216796875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 20%|█▉        | 96930/488281 [20:51<48:18, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 96958/488281 [20:51<48:11, 135.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 96972/488281 [20:51<58:23, 111.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 97000/488281 [20:51<53:08, 122.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.5380630493164, 'l2_loss': 67.33786010742188, 'l1_loss': 56.20020294189453}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8528027534484863 4.287621974945068 8.891152381896973
91.37%
Reconstruction: (0.9136980849757977, 3.8528027534484863, 4.287621974945068, 8.891152381896973)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.58it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.74it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 20%|█▉        | 97027/488281 [20:56<8:18:10, 13.09it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97055/488281 [20:56<4:26:54, 24.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97083/488281 [20:56<2:34:53, 42.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97111/488281 [20:57<1:40:20, 64.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.5561294555664, 'l2_loss': 67.01702880859375, 'l1_loss': 56.539100646972656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 97139/488281 [20:57<1:13:57, 88.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 97153/488281 [20:57<1:16:32, 85.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 97181/488281 [20:57<1:02:01, 105.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97209/488281 [20:57<54:55, 118.68it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.48551177978516, 'l2_loss': 66.81423950195312, 'l1_loss': 56.67127227783203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 97237/488281 [20:58<51:24, 126.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97265/488281 [20:58<49:40, 131.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97293/488281 [20:58<48:50, 133.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97321/488281 [20:58<48:27, 134.48it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.0068359375, 'l2_loss': 66.81244659423828, 'l1_loss': 57.19438934326172}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 20%|█▉        | 97335/488281 [20:58<48:19, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 97363/488281 [20:59<55:21, 117.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97391/488281 [20:59<51:36, 126.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.31278991699219,

 20%|█▉        | 97419/488281 [20:59<49:47, 130.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97447/488281 [20:59<48:53, 133.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97475/488281 [20:59<48:26, 134.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97503/488281 [21:00<48:15, 134.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.58650207519531, 'l2_loss': 67.02297973632812, 'l1_loss': 56.56351852416992}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|█▉        | 97531/488281 [21:00<48:07, 135.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 97559/488281 [21:00<55:17, 117.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|█▉        | 97587/488281 [21:00<51:33, 126.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|█▉        | 97615/488281 [21:01<49:44, 130.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.13458251953125, 'l2_loss': 67.3059310913086, 'l1_loss': 56.82865524291992}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 20%|█▉        | 97643/488281 [21:01<48:51, 133.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 97671/488281 [21:01<48:34, 134.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 97699/488281 [21:01<48:16, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.78926086425781,

 20%|██        | 97727/488281 [21:01<48:09, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 97741/488281 [21:02<58:47, 110.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 97769/488281 [21:02<53:16, 122.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 97797/488281 [21:02<50:34, 128.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.03060913085938, 'l2_loss': 67.40046691894531, 'l1_loss': 56.6301383972168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 20%|██        | 97825/488281 [21:02<49:15, 132.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 97853/488281 [21:02<48:35, 133.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 97881/488281 [21:03<48:15, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 97909/488281 [21:03<48:07, 135.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.17633056640625, 'l2_loss': 67.34149169921875, 'l1_loss': 56.834835052490234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 97937/488281 [21:03<55:14, 117.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 97965/488281 [21:03<51:30, 126.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 97993/488281 [21:03<49:40, 130.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 65
acts.shape=torch.Size([4096, 4096])
{'loss':


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.64it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 20%|██        | 98021/488281 [21:08<8:10:46, 13.25it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98049/488281 [21:08<4:24:54, 24.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98077/488281 [21:09<2:34:14, 42.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98105/488281 [21:09<1:40:02, 65.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.55619812011719, 'l2_loss': 67.06246948242188, 'l1_loss': 56.49372863769531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 98133/488281 [21:09<1:20:34, 80.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 98161/488281 [21:09<1:04:10, 101.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 98189/488281 [21:10<55:52, 116.37it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98217/488281 [21:10<51:50, 125.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.96405029296875, 'l2_loss': 67.05973815917969, 'l1_loss': 56.90431213378906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|██        | 98245/488281 [21:10<49:48, 130.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98273/488281 [21:10<48:49, 133.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98301/488281 [21:10<48:22, 134.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.50700378417969, 'l2_loss': 67.36579895019531, 'l1_loss': 56.141204833984375}
acts.shape=torch.Size([4096, 4096])
acts.shape

 20%|██        | 98329/488281 [21:11<55:33, 116.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 98357/488281 [21:11<51:37, 125.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98385/488281 [21:11<49:42, 130.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98413/488281 [21:11<48:47, 133.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35680389404297, 'l2_loss': 67.62266540527344, 'l1_loss': 56.73413848876953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|██        | 98441/488281 [21:11<48:18, 134.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98469/488281 [21:12<48:05, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98483/488281 [21:12<48:01, 135.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 98511/488281 [21:12<59:41, 108.84it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.47956085205078, 'l2_loss': 67.04315185546875, 'l1_loss': 56.43640899658203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|██        | 98539/488281 [21:12<53:39, 121.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98567/488281 [21:12<50:41, 128.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98595/488281 [21:13<49:13, 131.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.77490234375, 'l2_loss': 67.15772247314453, 'l1_loss': 56.6171760559082}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 20%|██        | 98623/488281 [21:13<48:33, 133.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98651/488281 [21:13<48:12, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98679/488281 [21:13<48:12, 134.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 98707/488281 [21:14<56:19, 115.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.17143249511719, 'l2_loss': 67.11767578125, 'l1_loss': 57.05376052856445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 20%|██        | 98735/488281 [21:14<51:59, 124.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98763/488281 [21:14<49:52, 130.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98791/488281 [21:14<48:49, 132.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98819/488281 [21:14<48:20, 134.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.16053009033203, 'l2_loss': 67.37293243408203, 'l1_loss': 56.78759765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 20%|██        | 98847/488281 [21:15<48:33, 133.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 98875/488281 [21:15<48:11, 134.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 98903/488281 [21:15<55:49, 116.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.79700469970703, 'l2_loss': 66.99840545654297, 'l1_loss': 56.79859924316406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|██        | 98931/488281 [21:15<51:44, 125.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98959/488281 [21:16<49:43, 130.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 98987/488281 [21:16<48:44, 133.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 12.22it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 10.80it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 10.76it/s]
 20%|██        | 99015/488281 [21:21<8:14:49, 13.11it/s] 

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99043/488281 [21:21<4:26:49, 24.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99071/488281 [21:21<2:35:06, 41.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99084/488281 [21:21<2:19:55, 46.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99112/488281 [21:21<1:32:28, 70.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.11254119873047, 'l2_loss': 67.37594604492188, 'l1_loss': 56.736595153808594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 20%|██        | 99140/488281 [21:22<1:09:33, 93.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99168/488281 [21:22<58:24, 111.04it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99196/488281 [21:22<53:06, 122.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.47221374511719, 'l2_loss': 67.3850326538086, 'l1_loss': 57.087181091308594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|██        | 99224/488281 [21:22<50:23, 128.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99252/488281 [21:22<49:02, 132.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99280/488281 [21:23<56:24, 114.93it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99308/488281 [21:23<52:00, 124.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.91035461425781, 'l2_loss': 66.7341079711914, 'l1_loss': 56.17625045776367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 20%|██        | 99336/488281 [21:23<49:49, 130.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99364/488281 [21:23<48:45, 132.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99392/488281 [21:24<48:13, 134.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.61710357666016,

 20%|██        | 99420/488281 [21:24<48:00, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99448/488281 [21:24<47:51, 135.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99476/488281 [21:24<55:43, 116.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99504/488281 [21:24<51:40, 125.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.82382202148438, 'l2_loss': 67.20416259765625, 'l1_loss': 56.619659423828125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 20%|██        | 99532/488281 [21:25<49:38, 130.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99560/488281 [21:25<48:39, 133.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99588/488281 [21:25<48:10, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99616/488281 [21:25<47:57, 135.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.98037719726562, 'l2_loss': 67.30253601074219, 'l1_loss': 56.67784118652344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|██        | 99644/488281 [21:25<47:50, 135.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99672/488281 [21:26<55:44, 116.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99700/488281 [21:26<51:48, 124.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.50851440429688,

 20%|██        | 99728/488281 [21:26<49:43, 130.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99756/488281 [21:26<48:41, 132.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99784/488281 [21:27<48:10, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99812/488281 [21:27<47:57, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97772216796875, 'l2_loss': 67.51081848144531, 'l1_loss': 56.46690368652344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 20%|██        | 99826/488281 [21:27<47:52, 135.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99854/488281 [21:27<55:52, 115.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 99882/488281 [21:27<51:41, 125.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99910/488281 [21:28<49:39, 130.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.6734390258789, 'l2_loss': 67.10166931152344, 'l1_loss': 56.57176971435547}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 20%|██        | 99938/488281 [21:28<48:38, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99966/488281 [21:28<48:08, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 99994/488281 [21:28<47:53, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 66
acts.shape=torch.Size([4096, 4096])
{'loss': 124.33511352539062, 'l2_loss': 67.40902709960938, 'l1_loss': 56.926090240478516}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size(


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 11.35it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 20%|██        | 100022/488281 [21:33<8:11:59, 13.15it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 20%|██        | 100033/488281 [21:33<6:30:56, 16.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 100047/488281 [21:33<4:43:06, 22.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 100061/488281 [21:33<3:30:10, 30.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 100075/488281 [21:33<2:40:18, 40.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 20%|██        | 100089/488281 [21:34<2:05:58, 51.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.6209716796875, 'l2_loss': 67.34233856201172, 'l1_loss': 57.27863311767578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100103/488281 [21:34<1:42:14, 63.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100117/488281 [21:34<1:25:43, 75.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100131/488281 [21:34<1:14:14, 87.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100145/488281 [21:34<1:06:14, 97.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100159/488281 [21:34<1:00:38, 106.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100173/488281 [21:34<56:43, 114.02it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100187/488281 [21:34<53:59, 119.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100201/488281 [21:34<52:23, 123.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.86558532714844, 'l2_loss': 67.83111572265625, 'l1_loss': 57.03446960449219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100215/488281 [21:34<50:58, 126.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100229/488281 [21:35<1:00:28, 106.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100243/488281 [21:35<56:37, 114.20it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100257/488281 [21:35<53:55, 119.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100271/488281 [21:35<52:02, 124.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100285/488281 [21:35<50:42, 127.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100299/488281 [21:35<49:46, 129.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.90839385986328, 'l2_loss': 67.11580657958984, 'l1_loss': 56.79258728027344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100313/488281 [21:35<49:09, 131.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100327/488281 [21:35<48:42, 132.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100341/488281 [21:35<48:22, 133.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100355/488281 [21:36<48:08, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100369/488281 [21:36<47:58, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100383/488281 [21:36<47:52, 135.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100397/488281 [21:36<47:48, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100411/488281 [21:36<47:46, 135.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.87638092041016, 'l2_loss': 67.83840942382812, 'l1_loss': 57.03797149658203}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100439/488281 [21:36<54:51, 117.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100467/488281 [21:36<51:08, 126.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100495/488281 [21:37<49:20, 130.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100523/488281 [21:37<48:28, 133.33it/s]

{'loss': 124.57350158691406, 'l2_loss': 67.46707916259766, 'l1_loss': 57.10642623901367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 100551/488281 [21:37<48:01, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100579/488281 [21:37<47:48, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100607/488281 [21:38<47:43, 135.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.39595031738281, 'l2_loss': 67.112548828125, 'l1_loss': 57.28340148925781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100621/488281 [21:38<57:54, 111.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100649/488281 [21:38<52:39, 122.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100677/488281 [21:38<50:03, 129.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100705/488281 [21:38<48:48, 132.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.00962829589844, 'l2_loss': 66.61538696289062, 'l1_loss': 56.39424514770508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 100733/488281 [21:39<48:20, 133.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100761/488281 [21:39<47:56, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100789/488281 [21:39<47:44, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100817/488281 [21:39<55:16, 116.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.29630279541016, 'l2_loss': 66.75416564941406, 'l1_loss': 56.542137145996094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 21%|██        | 100845/488281 [21:39<51:20, 125.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100873/488281 [21:40<49:23, 130.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100901/488281 [21:40<48:28, 133.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.074951171875, 'l2_loss': 66.99809265136719, 'l1_loss': 56.07686233520508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 21%|██        | 100929/488281 [21:40<47:59, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100957/488281 [21:40<47:46, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 100985/488281 [21:40<47:39, 135.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 100999/488281 [21:41<57:53, 111.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.02767944335938, 'l2_loss': 67.62088775634766, 'l1_loss': 57.40678787231445}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8933048248291016 4.326300621032715 8.936633110046387
91.41%
Reconstruction: (0.9141448321988506, 3.8933048248291016, 4.326300621032715, 8.936633110046387)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.39it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 21%|██        | 101025/488281 [21:45<8:22:42, 12.84it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101053/488281 [21:46<4:27:06, 24.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101081/488281 [21:46<2:34:19, 41.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101109/488281 [21:46<1:39:42, 64.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.49090576171875, 'l2_loss': 67.13392639160156, 'l1_loss': 57.35697937011719}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 101137/488281 [21:46<1:13:04, 88.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101165/488281 [21:46<1:00:02, 107.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101179/488281 [21:46<56:17, 114.61it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 101207/488281 [21:47<58:54, 109.50it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.3830795288086, 'l2_loss': 67.05853271484375, 'l1_loss': 56.324546813964844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 101235/488281 [21:47<53:16, 121.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101263/488281 [21:47<50:19, 128.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101291/488281 [21:47<48:53, 131.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101319/488281 [21:48<48:12, 133.80it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.14363098144531, 'l2_loss': 67.261474609375, 'l1_loss': 56.88215255737305}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 21%|██        | 101347/488281 [21:48<47:51, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101375/488281 [21:48<47:41, 135.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 101389/488281 [21:48<58:01, 111.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.90608215332031, 'l2_loss': 67.17082214355469, 'l1_l

 21%|██        | 101417/488281 [21:48<52:40, 122.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101445/488281 [21:49<50:02, 128.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101473/488281 [21:49<48:44, 132.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101501/488281 [21:49<48:07, 133.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.07284545898438, 'l2_loss': 67.26834106445312, 'l1_loss': 56.804500579833984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 21%|██        | 101529/488281 [21:49<47:49, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101557/488281 [21:49<47:39, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 101585/488281 [21:50<54:46, 117.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 101613/488281 [21:50<51:05, 126.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97097778320312, 'l2_loss': 67.28802490234375, 'l1_loss': 56.68294906616211}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 101641/488281 [21:50<49:14, 130.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101669/488281 [21:50<48:19, 133.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101697/488281 [21:51<47:53, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.74742126464844, 'l2_loss': 67.11253356933594, 'l1_loss': 56.634891510009766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 21%|██        | 101725/488281 [21:51<47:41, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101753/488281 [21:51<47:45, 134.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 101781/488281 [21:51<54:51, 117.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 101809/488281 [21:51<51:07, 126.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.18019104003906, 'l2_loss': 66.95792388916016, 'l1_loss': 56.22226333618164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 101837/488281 [21:52<49:14, 130.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101865/488281 [21:52<48:20, 133.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101893/488281 [21:52<47:52, 134.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 101921/488281 [21:52<47:41, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.70993041992188, 'l2_loss': 67.10955047607422, 'l1_loss': 56.60037612915039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 101949/488281 [21:52<47:34, 135.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 101977/488281 [21:53<54:38, 117.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 101991/488281 [21:53<52:28, 122.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 67
acts.shape=torch.Size([4096, 4096])
{'loss': 124.20646667480469, 'l2_loss': 67.09668731689453, 'l1_loss': 57.109779357910156}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size(


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.32it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.39it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 21%|██        | 102018/488281 [21:58<8:14:36, 13.02it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102046/488281 [21:58<4:24:50, 24.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102074/488281 [21:58<2:33:33, 41.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102102/488281 [21:58<1:39:22, 64.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.51318359375, 'l2_loss': 67.64140319824219, 'l1_loss': 56.87177658081055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 21%|██        | 102130/488281 [21:58<1:12:52, 88.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102158/488281 [21:59<1:06:54, 96.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102186/488281 [21:59<56:58, 112.94it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102214/488281 [21:59<52:07, 123.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.40444946289062, 'l2_loss': 67.34652709960938, 'l1_loss': 57.05792236328125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 102228/488281 [21:59<50:43, 126.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102242/488281 [21:59<50:01, 128.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102256/488281 [21:59<49:15, 130.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102270/488281 [21:59<48:42, 132.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102284/488281 [22:00<48:19, 133.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102298/488281 [22:00<48:02, 133.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.01527404785156, 'l2_loss': 67.00180053710938, 'l1_loss': 57.01347351074219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102312/488281 [22:00<47:52, 134.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102326/488281 [22:00<47:44, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102340/488281 [22:00<58:03, 110.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102354/488281 [22:00<54:52, 117.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102368/488281 [22:00<52:37, 122.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102382/488281 [22:00<51:03, 125.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102396/488281 [22:01<49:57, 128.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102410/488281 [22:01<49:12, 130.67it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.1885986328125, 'l2_loss': 66.6990966796875, 'l1_loss': 56.489501953125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102424/488281 [22:01<48:40, 132.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102438/488281 [22:01<48:17, 133.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102452/488281 [22:01<48:00, 133.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102466/488281 [22:01<47:49, 134.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102480/488281 [22:01<47:41, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102494/488281 [22:01<47:36, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.44277954101562, 'l2_loss': 67.3243408203125, 'l1_loss': 57.118438720703125}
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102508/488281 [22:01<47:34, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102522/488281 [22:01<47:31, 135.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102550/488281 [22:02<54:34, 117.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102578/488281 [22:02<50:53, 126.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102606/488281 [22:02<49:07, 130.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.72480773925781, 'l2_loss': 67.11254119873047, 'l1_loss': 56.61226272583008}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 102634/488281 [22:02<48:12, 133.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102662/488281 [22:03<47:47, 134.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102690/488281 [22:03<47:34, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102718/488281 [22:03<47:29, 135.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.80591583251953, 'l2_loss': 67.43698120117188, 'l1_loss': 56.368934631347656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102732/488281 [22:03<57:38, 111.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102760/488281 [22:03<52:40, 121.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102788/488281 [22:04<49:57, 128.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102816/488281 [22:04<48:38, 132.07it/s]

{'loss': 124.80870056152344, 'l2_loss': 67.49522399902344, 'l1_loss': 57.313472747802734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 21%|██        | 102844/488281 [22:04<47:58, 133.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102872/488281 [22:04<47:38, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102900/488281 [22:04<47:29, 135.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34063720703125, 'l2_loss': 67.12875366210938, 'l1_loss': 57.21188735961914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 102928/488281 [22:05<58:12, 110.35it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 102956/488281 [22:05<52:38, 121.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102984/488281 [22:05<49:56, 128.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 102998/488281 [22:05<49:08, 130.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.41508483886719, 'l2_loss': 66.96568298339844, 'l1_loss': 56.449398040771484}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.930854082107544 4.365675449371338 9.007549285888672
91.43%
Reconstruction: (0.914349522709195, 3.930854082107544, 4.365675449371338, 9.007549285888672)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.14it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.84it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 21%|██        | 103026/488281 [22:10<8:02:08, 13.32it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103054/488281 [22:10<4:20:22, 24.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103082/488281 [22:10<2:31:42, 42.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103096/488281 [22:10<2:00:23, 53.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.42718505859375, 'l2_loss': 67.73370361328125, 'l1_loss': 56.693477630615234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 103124/488281 [22:11<1:30:08, 71.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103152/488281 [22:11<1:08:18, 93.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103180/488281 [22:11<57:35, 111.45it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103208/488281 [22:11<52:21, 122.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.82032012939453, 'l2_loss': 67.94304656982422, 'l1_loss': 56.87727355957031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 103236/488281 [22:12<49:46, 128.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103264/488281 [22:12<48:47, 131.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 103292/488281 [22:12<48:02, 133.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 103320/488281 [22:12<54:56, 116.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.9043960571289, 'l2_loss': 67.26630401611328, 'l1_loss': 56.638092041015625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 103348/488281 [22:12<51:01, 125.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103376/488281 [22:13<49:06, 130.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103404/488281 [22:13<48:12, 133.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.27130126953125, 'l2_loss': 67.30960083007812, 'l1_loss': 56.96170425415039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 103432/488281 [22:13<47:43, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103460/488281 [22:13<47:29, 135.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103474/488281 [22:13<47:25, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 103502/488281 [22:14<54:29, 117.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.56150817871094, 'l2_loss': 67.34017944335938, 'l1_loss': 57.22132873535156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 103530/488281 [22:14<50:48, 126.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103558/488281 [22:14<48:59, 130.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103586/488281 [22:14<48:05, 133.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103614/488281 [22:14<47:40, 134.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.13961791992188, 'l2_loss': 67.22198486328125, 'l1_loss': 56.91762924194336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 103642/488281 [22:15<47:27, 135.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103670/488281 [22:15<47:21, 135.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██        | 103698/488281 [22:15<54:20, 117.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.87847900390625, 'l2_loss': 67.52598571777344, 'l1_loss': 57.35249328613281}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██        | 103726/488281 [22:15<50:44, 126.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██        | 103754/488281 [22:16<48:56, 130.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 103782/488281 [22:16<48:14, 132.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 103810/488281 [22:16<47:44, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.20039367675781, 'l2_loss': 67.30427551269531, 'l1_loss': 56.8961181640625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 21%|██▏       | 103838/488281 [22:16<47:28, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 103866/488281 [22:16<47:21, 135.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 103894/488281 [22:17<58:42, 109.13it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 103922/488281 [22:17<52:52, 121.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.94720458984375, 'l2_loss': 67.24803161621094, 'l1_loss': 56.69917297363281}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██▏       | 103950/488281 [22:17<49:59, 128.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 103978/488281 [22:17<48:34, 131.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 103992/488281 [22:17<48:09, 132.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 68
acts.shape=torch.Size([4096, 4096])
{'loss': 123.771240234375, 'l2_loss': 67.11177062988281, 'l1_loss': 56.65946578979492}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8602211


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.48it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.71it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 21%|██▏       | 104020/488281 [22:22<8:02:48, 13.26it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104048/488281 [22:22<4:20:39, 24.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104062/488281 [22:22<3:16:37, 32.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104075/488281 [22:23<2:44:13, 38.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104089/488281 [22:23<2:08:35, 49.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.02576446533203, 'l2_loss': 67.2254409790039, 'l1_loss': 56.800323486328125}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██▏       | 104103/488281 [22:23<1:43:56, 61.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104117/488281 [22:23<1:26:47, 73.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104131/488281 [22:23<1:14:51, 85.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104145/488281 [22:23<1:06:31, 96.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104159/488281 [22:23<1:00:42, 105.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104173/488281 [22:23<56:38, 113.03it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104187/488281 [22:23<53:47, 119.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104201/488281 [22:24<51:50, 123.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.47342681884766, 'l2_loss': 68.15509033203125, 'l1_loss': 57.318336486816406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 21%|██▏       | 104215/488281 [22:24<50:27, 126.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104229/488281 [22:24<49:28, 129.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104243/488281 [22:24<48:47, 131.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104257/488281 [22:24<58:26, 109.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104285/488281 [22:24<52:58, 120.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104313/488281 [22:24<50:02, 127.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.81640625, 'l2_loss': 67.17337036132812, 'l1_loss': 56.64303970336914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.

 21%|██▏       | 104341/488281 [22:25<48:34, 131.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104369/488281 [22:25<47:51, 133.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104397/488281 [22:25<47:29, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.29566955566406,

 21%|██▏       | 104425/488281 [22:25<47:20, 135.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104439/488281 [22:25<47:17, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104467/488281 [22:26<54:33, 117.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104495/488281 [22:26<50:46, 125.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.4151611328125, 'l2_loss': 67.62417602539062, 'l1_loss': 56.79098129272461}
acts.shape=t

 21%|██▏       | 104523/488281 [22:26<48:57, 130.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104551/488281 [22:26<48:01, 133.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104579/488281 [22:27<47:34, 134.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104607/488281 [22:27<47:23, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.47283935546875, 'l2_loss': 67.79249572753906, 'l1_loss': 56.68034362792969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██▏       | 104635/488281 [22:27<47:16, 135.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104663/488281 [22:27<54:18, 117.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104691/488281 [22:27<50:39, 126.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104719/488281 [22:28<48:52, 130.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.98683166503906, 'l2_loss': 67.22689819335938, 'l1_loss': 56.75993728637695}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 21%|██▏       | 104747/488281 [22:28<47:58, 133.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104775/488281 [22:28<47:32, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104803/488281 [22:28<47:31, 134.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.2412109375, 'l2_loss': 67.35433197021484, 'l1_loss': 56.88688278198242}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 21%|██▏       | 104831/488281 [22:28<47:19, 135.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104845/488281 [22:29<57:49, 110.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 21%|██▏       | 104873/488281 [22:29<52:21, 122.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104901/488281 [22:29<49:42, 128.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.39241027832031, 'l2_loss': 67.36369323730469, 'l1_loss': 57.028717041015625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 21%|██▏       | 104929/488281 [22:29<48:23, 132.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 21%|██▏       | 104957/488281 [22:29<47:43, 133.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 104985/488281 [22:30<47:24, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 104999/488281 [22:30<47:18, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.21157836914062, 'l2_loss': 67.39678955078125, 'l1_loss': 56.814788818359375}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.905808925628662 4.345156669616699 8.954850196838379
91.30%
Reconstruction: (0.9129839269698082, 3.905808925628662, 4.345156669616699, 8.954850196838379)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 15.68it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 11.75it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 11.69it/s]
 22%|██▏       | 105013/488281 [22:34<11:09:03,  9.55it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105024/488281 [22:35<8:52:13, 12.00it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105052/488281 [22:35<4:36:45, 23.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105080/488281 [22:35<2:37:46, 40.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105108/488281 [22:35<1:40:54, 63.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.71134948730469, 'l2_loss': 67.08206176757812, 'l1_loss': 56.62928771972656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 105136/488281 [22:35<1:13:21, 87.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105164/488281 [22:36<59:55, 106.57it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105192/488281 [22:36<53:22, 119.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105206/488281 [22:36<51:30, 123.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.0035171508789, 'l2_loss': 66.7789306640625, 'l1_loss': 56.224586486816406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105234/488281 [22:36<59:07, 107.99it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105262/488281 [22:37<52:58, 120.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105290/488281 [22:37<49:57, 127.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105318/488281 [22:37<48:39, 131.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.8792724609375, 'l2_loss': 67.67695617675781, 'l1_loss': 57.20231628417969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 105346/488281 [22:37<47:50, 133.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105374/488281 [22:37<47:26, 134.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105402/488281 [22:38<47:16, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.30891418457031, 'l2_loss': 67.37065887451172, 'l1_loss': 56.93825912475586}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105430/488281 [22:38<55:50, 114.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105458/488281 [22:38<51:21, 124.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105486/488281 [22:38<49:08, 129.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105514/488281 [22:38<48:06, 132.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35099792480469, 'l2_loss': 67.47510528564453, 'l1_loss': 56.87589645385742}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 105542/488281 [22:39<47:32, 134.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105570/488281 [22:39<47:17, 134.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105598/488281 [22:39<47:10, 135.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105612/488281 [22:39<58:05, 109.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.5976791381836, 'l2_loss': 66.95840454101562, 'l1_loss': 56.63927459716797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 105640/488281 [22:39<52:26, 121.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105668/488281 [22:40<49:41, 128.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105696/488281 [22:40<48:19, 131.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.982666015625, 'l2_loss': 67.28681945800781, 'l1_loss': 56.69584655761719}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 22%|██▏       | 105724/488281 [22:40<47:39, 133.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105752/488281 [22:40<47:20, 134.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105780/488281 [22:41<47:10, 135.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105808/488281 [22:41<55:32, 114.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.772216796875, 'l2_loss': 66.88970947265625, 'l1_loss': 56.882511138916016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 105836/488281 [22:41<51:19, 124.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105864/488281 [22:41<49:06, 129.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105892/488281 [22:41<48:01, 132.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105920/488281 [22:42<47:31, 134.07it/s]

{'loss': 124.05691528320312, 'l2_loss': 67.36006164550781, 'l1_loss': 56.69685363769531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 105948/488281 [22:42<47:15, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 105976/488281 [22:42<47:07, 135.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 105990/488281 [22:42<58:21, 109.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 69
acts.shape=torch.Size([4096, 4096])
{'loss': 123.77007293701172, 'l2_loss': 66.81889343261719, 'l1_loss': 56.95117950439453}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9547362327575684 4.385852813720703 9.0232


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.25it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


 22%|██▏       | 106002/488281 [22:47<11:43:03,  9.06it/s]

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106016/488281 [22:47<8:20:07, 12.74it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106030/488281 [22:47<6:01:14, 17.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106044/488281 [22:47<4:25:32, 23.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106058/488281 [22:47<3:19:17, 31.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106072/488281 [22:47<2:33:15, 41.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106086/488281 [22:47<2:01:11, 52.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106100/488281 [22:48<1:38:50, 64.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.98956298828125, 'l2_loss': 67.69761657714844, 'l1_loss': 57.29194641113281}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 106114/488281 [22:48<1:23:15, 76.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106128/488281 [22:48<1:12:20, 88.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106142/488281 [22:48<1:04:43, 98.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106156/488281 [22:48<59:22, 107.25it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106170/488281 [22:48<55:38, 114.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106198/488281 [22:48<1:03:40, 100.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.190673828125, 'l2_loss': 67.5118408203125, 'l1_loss': 56.6788330078125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 22%|██▏       | 106226/488281 [22:49<55:09, 115.45it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106254/488281 [22:49<50:57, 124.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106282/488281 [22:49<48:53, 130.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106310/488281 [22:49<47:54, 132.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.9784927368164, 'l2_loss': 66.9676513671875, 'l1_loss': 57.010841369628906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 106338/488281 [22:49<47:36, 133.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106366/488281 [22:50<47:15, 134.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106380/488281 [22:50<59:59, 106.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106408/488281 [22:50<53:21, 119.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.21327209472656, 'l2_loss': 67.21851348876953, 'l1_loss': 56.9947624206543}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 106436/488281 [22:50<50:03, 127.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106464/488281 [22:50<48:27, 131.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106492/488281 [22:51<47:39, 133.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.20942687988281, 'l2_loss': 67.526123046875, 'l1_loss': 56.68330764770508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 22%|██▏       | 106520/488281 [22:51<47:17, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106548/488281 [22:51<47:05, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106576/488281 [22:51<54:39, 116.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106604/488281 [22:52<50:42, 125.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05130767822266, 'l2_loss': 66.876953125, 'l1_loss': 57.174354553222656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 22%|██▏       | 106632/488281 [22:52<48:45, 130.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106660/488281 [22:52<47:47, 133.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106688/488281 [22:52<47:18, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106716/488281 [22:52<47:06, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.9291000366211, 'l2_loss': 67.32966613769531, 'l1_loss': 56.59943389892578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 106744/488281 [22:53<46:58, 135.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106772/488281 [22:53<53:59, 117.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106800/488281 [22:53<50:20, 126.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34991455078125, 'l2_loss': 67.45295715332031, 'l1_loss': 56.89695358276367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 106828/488281 [22:53<48:34, 130.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106856/488281 [22:54<47:48, 132.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106884/488281 [22:54<47:19, 134.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 106912/488281 [22:54<47:06, 134.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.03226470947266, 'l2_loss': 66.58145904541016, 'l1_loss': 56.4508056640625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 106940/488281 [22:54<46:58, 135.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106968/488281 [22:54<54:07, 117.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 106996/488281 [22:55<50:24, 126.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.22it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 22%|██▏       | 107023/488281 [22:59<8:04:23, 13.12it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107051/488281 [23:00<4:19:35, 24.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107079/488281 [23:00<2:30:41, 42.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107107/488281 [23:00<1:37:39, 65.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.07173156738281, 'l2_loss': 67.371337890625, 'l1_loss': 56.70039749145508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 22%|██▏       | 107135/488281 [23:00<1:11:43, 88.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107149/488281 [23:00<1:16:28, 83.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107177/488281 [23:01<1:01:22, 103.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107205/488281 [23:01<53:59, 117.64it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.23893737792969, 'l2_loss': 67.29032897949219, 'l1_loss': 56.9486083984375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 107233/488281 [23:01<50:19, 126.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107261/488281 [23:01<48:31, 130.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107289/488281 [23:01<47:39, 133.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.83917999267578,

 22%|██▏       | 107317/488281 [23:02<47:14, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107345/488281 [23:02<56:02, 113.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107373/488281 [23:02<51:20, 123.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107401/488281 [23:02<49:02, 129.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.93608093261719, 'l2_loss': 67.1159439086914, 'l1_loss': 56.820133209228516}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 107429/488281 [23:03<47:53, 132.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107457/488281 [23:03<47:20, 134.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107485/488281 [23:03<47:03, 134.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107513/488281 [23:03<46:57, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.15032958984375, 'l2_loss': 66.69033813476562, 'l1_loss': 56.459991455078125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107541/488281 [23:03<55:11, 114.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107569/488281 [23:04<50:54, 124.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107597/488281 [23:04<48:48, 130.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.79397583007812,

 22%|██▏       | 107625/488281 [23:04<47:46, 132.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107653/488281 [23:04<47:15, 134.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107681/488281 [23:04<47:00, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107709/488281 [23:05<46:54, 135.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.638427734375, 'l2_loss': 67.06684112548828, 'l1_loss': 56.57158279418945}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107737/488281 [23:05<54:25, 116.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107765/488281 [23:05<50:31, 125.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107793/488281 [23:05<48:35, 130.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107821/488281 [23:06<47:39, 133.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.50663757324219, 'l2_loss': 67.2340316772461, 'l1_loss': 57.27260971069336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 107849/488281 [23:06<47:11, 134.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107863/488281 [23:06<47:02, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107891/488281 [23:06<47:09, 134.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.91248321533203, 'l2_loss': 67.12860107421875, 'l1_loss': 56.78388214111328}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 107919/488281 [23:06<54:40, 115.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 107947/488281 [23:07<50:36, 125.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107975/488281 [23:07<48:37, 130.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 107989/488281 [23:07<48:03, 131.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 70
acts.shape=torch.Size([4096, 4096])
{'loss': 124.61181640625, 'l2_loss': 67.62503051757812, 'l1_loss': 56.98678970336914}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.884665012359619 4.32375955581665 8.946816444396973
91.33%
Reconstruction: (0.9132593030152972, 3.884665012359619, 4.32375955581665, 8.946816444396973)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 10.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 10.27it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 10.26it/s]
 22%|██▏       | 108003/488281 [23:12<11:20:08,  9.32it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 108031/488281 [23:12<5:57:04, 17.75it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108059/488281 [23:12<3:18:46, 31.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108087/488281 [23:12<2:01:13, 52.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 108115/488281 [23:13<1:30:11, 70.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.59262084960938, 'l2_loss': 66.83338165283203, 'l1_loss': 56.75924301147461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 108143/488281 [23:13<1:08:00, 93.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108171/488281 [23:13<57:07, 110.90it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108199/488281 [23:13<51:47, 122.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.69071960449219, 'l2_loss': 67.12803649902344, 'l1_loss': 56.56268310546875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 108227/488281 [23:13<49:11, 128.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108255/488281 [23:14<47:55, 132.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108283/488281 [23:14<47:17, 133.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 108311/488281 [23:14<54:02, 117.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.67459106445312, 'l2_loss': 67.5064468383789, 'l1_loss': 57.16814422607422}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 108339/488281 [23:14<50:17, 125.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108367/488281 [23:14<48:45, 129.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 108395/488281 [23:15<47:40, 132.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108423/488281 [23:15<47:10, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.31144714355469, 'l2_loss': 66.8030014038086, 'l1_loss': 56.50844192504883}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 108451/488281 [23:15<46:54, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108479/488281 [23:15<46:46, 135.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 108493/488281 [23:16<1:07:01, 94.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.09487915039062, 'l2_loss': 67.18177795410156, 'l1_loss': 56.9130973815918}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 108521/488281 [23:16<56:38, 111.73it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108549/488281 [23:16<51:32, 122.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108577/488281 [23:16<49:02, 129.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108605/488281 [23:16<47:49, 132.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.03229522705078, 'l2_loss': 67.11128997802734, 'l1_loss': 56.92100524902344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 108633/488281 [23:17<47:13, 134.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108661/488281 [23:17<46:55, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 108689/488281 [23:17<54:59, 115.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 108717/488281 [23:17<50:44, 124.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.33036804199219, 'l2_loss': 66.69843292236328, 'l1_loss': 56.63193130493164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 108745/488281 [23:18<48:37, 130.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108773/488281 [23:18<47:35, 132.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108801/488281 [23:18<47:07, 134.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.19903564453125, 'l2_loss': 67.85258483886719, 'l1_loss': 57.34645080566406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 108829/488281 [23:18<46:51, 134.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108857/488281 [23:18<46:44, 135.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 108885/488281 [23:19<54:37, 115.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 108913/488281 [23:19<50:34, 125.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.45354461669922, 'l2_loss': 67.56074523925781, 'l1_loss': 56.892799377441406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 22%|██▏       | 108941/488281 [23:19<48:32, 130.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108969/488281 [23:19<47:32, 132.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 108997/488281 [23:19<47:03, 134.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.32994079589844,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.25it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 22%|██▏       | 109025/488281 [23:24<7:59:13, 13.19it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109053/488281 [23:24<4:18:34, 24.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109080/488281 [23:25<2:39:06, 39.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109108/488281 [23:25<1:41:18, 62.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05914306640625, 'l2_loss': 67.44451904296875, 'l1_loss': 56.614627838134766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 22%|██▏       | 109136/488281 [23:25<1:13:17, 86.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109164/488281 [23:25<59:37, 105.97it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109192/488281 [23:26<52:57, 119.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109220/488281 [23:26<49:43, 127.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.78257751464844, 'l2_loss': 67.39833068847656, 'l1_loss': 56.38424301147461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 22%|██▏       | 109234/488281 [23:26<48:46, 129.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109262/488281 [23:26<59:43, 105.77it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109290/488281 [23:26<52:59, 119.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.2183837890625, 

 22%|██▏       | 109318/488281 [23:27<49:42, 127.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109346/488281 [23:27<48:05, 131.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109374/488281 [23:27<47:17, 133.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109402/488281 [23:27<47:09, 133.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35911560058594, 'l2_loss': 67.6344985961914, 'l1_loss': 56.7246208190918}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 22%|██▏       | 109430/488281 [23:27<46:50, 134.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109458/488281 [23:28<55:22, 114.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109486/488281 [23:28<50:51, 124.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109514/488281 [23:28<48:40, 129.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.22525024414062, 'l2_loss': 67.32948303222656, 'l1_loss': 56.8957633972168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 109542/488281 [23:28<47:34, 132.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109570/488281 [23:29<47:02, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109598/488281 [23:29<46:46, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35795593261719, 'l2_loss': 67.315185546875, 'l1_loss': 57.04277038574219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 22%|██▏       | 109626/488281 [23:29<46:40, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109654/488281 [23:29<54:06, 116.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109682/488281 [23:29<50:13, 125.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109710/488281 [23:30<48:21, 130.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.09974670410156, 'l2_loss': 67.1926040649414, 'l1_loss': 56.90714645385742}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 22%|██▏       | 109738/488281 [23:30<47:24, 133.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109766/488281 [23:30<46:56, 134.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109794/488281 [23:30<46:43, 135.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 22%|██▏       | 109822/488281 [23:30<46:37, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.85224914550781, 'l2_loss': 67.27261352539062, 'l1_loss': 56.57963562011719}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 22%|██▏       | 109836/488281 [23:31<58:28, 107.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 109864/488281 [23:31<52:21, 120.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 109892/488281 [23:31<49:21, 127.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.21812438964844, 'l2_loss': 67.20211029052734, 'l1_loss': 57.016014099121094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 109920/488281 [23:31<48:04, 131.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 109948/488281 [23:31<47:16, 133.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 109976/488281 [23:32<46:51, 134.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 109990/488281 [23:32<46:44, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 71
acts.shape=torch.Size([4096, 4096])
{'loss': 124.18255615234375, 'l2_loss': 67.51254272460938, 'l1_loss': 56.670013427734375}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.900574207305908 4.3347883224487305 9.033395767211914
91.54%
Reconstruction: (0.9154044008592471, 3.900574207305908, 4.3347883224487305, 9.033395767211914)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.24it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.84it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 23%|██▎       | 110016/488281 [23:37<8:23:44, 12.52it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 110044/488281 [23:37<4:25:31, 23.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110072/488281 [23:37<2:32:37, 41.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110100/488281 [23:37<1:38:11, 64.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.6074447631836, 'l2_loss': 66.95246887207031, 'l1_loss': 56.65497589111328}
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 23%|██▎       | 110128/488281 [23:37<1:11:46, 87.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110156/488281 [23:38<58:50, 107.12it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110184/488281 [23:38<52:30, 120.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110198/488281 [23:38<50:41, 124.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.60722351074219, 'l2_loss': 67.86039733886719, 'l1_loss': 56.746826171875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

 23%|██▎       | 110226/488281 [23:38<55:33, 113.42it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 110254/488281 [23:38<50:53, 123.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110282/488281 [23:39<48:37, 129.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110310/488281 [23:39<47:32, 132.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.297607421875, 'l2_loss': 67.12191772460938, 'l1_loss': 57.17569351196289}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 23%|██▎       | 110338/488281 [23:39<46:57, 134.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110366/488281 [23:39<46:41, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110394/488281 [23:39<46:33, 135.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 110422/488281 [23:40<53:46, 117.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.89422607421875, 'l2_loss': 67.10919189453125, 'l1_loss': 56.785037994384766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 110450/488281 [23:40<50:01, 125.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110478/488281 [23:40<48:10, 130.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110506/488281 [23:40<47:18, 133.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.52820587158203, 'l2_loss': 66.96310424804688, 'l1_loss': 56.565101623535156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 110534/488281 [23:41<46:50, 134.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110562/488281 [23:41<46:36, 135.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110590/488281 [23:41<46:30, 135.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 110604/488281 [23:41<56:43, 110.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.14735412597656, 'l2_loss': 67.30888366699219, 'l1_loss': 56.838470458984375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 110632/488281 [23:41<51:27, 122.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110660/488281 [23:42<48:52, 128.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110688/488281 [23:42<47:36, 132.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.58221435546875,

 23%|██▎       | 110716/488281 [23:42<46:59, 133.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110744/488281 [23:42<46:40, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110772/488281 [23:42<46:31, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 110800/488281 [23:43<53:29, 117.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.33645629882812, 'l2_loss': 67.51792907714844, 'l1_loss': 56.81852722167969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 110828/488281 [23:43<49:52, 126.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110856/488281 [23:43<48:05, 130.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110884/488281 [23:43<47:12, 133.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 110912/488281 [23:43<46:47, 134.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.489501953125, 'l2_loss': 67.61094665527344, 'l1_loss': 56.87855911254883}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 23%|██▎       | 110940/488281 [23:44<46:45, 134.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 110968/488281 [23:44<46:33, 135.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 110996/488281 [23:44<53:34, 117.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.48090362548828, 'l2_loss': 66.72389221191406, 'l1_l


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 10.11it/s]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


[A
100%|██████████| 5/5 [00:00<00:00, 10.35it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


 23%|██▎       | 111023/488281 [23:49<8:10:12, 12.83it/s] 

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 409

 23%|██▎       | 111051/488281 [23:49<4:22:09, 23.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111079/488281 [23:49<2:31:41, 41.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111107/488281 [23:50<1:37:53, 64.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.98812866210938, 'l2_loss': 67.33894348144531, 'l1_loss': 56.64918899536133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 111135/488281 [23:50<1:11:34, 87.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111163/488281 [23:50<58:42, 107.07it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111191/488281 [23:50<59:19, 105.94it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111219/488281 [23:51<52:41, 119.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.00811767578125, 'l2_loss': 67.44066619873047, 'l1_loss': 56.56745529174805}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 111247/488281 [23:51<49:26, 127.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111275/488281 [23:51<47:50, 131.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111303/488281 [23:51<47:03, 133.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.0308837890625, 'l2_loss': 67.94767761230469, 'l1_loss': 57.08320236206055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 23%|██▎       | 111331/488281 [23:51<46:41, 134.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111359/488281 [23:52<46:29, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111373/488281 [23:52<56:22, 111.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111401/488281 [23:52<51:15, 122.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.84073638916016, 'l2_loss': 67.22179412841797, 'l1_loss': 56.61894226074219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 111429/488281 [23:52<49:02, 128.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111457/488281 [23:52<47:37, 131.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111485/488281 [23:53<46:57, 133.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111513/488281 [23:53<46:37, 134.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.40593719482422, 'l2_loss': 66.9598388671875, 'l1_loss': 56.44609832763672}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 23%|██▎       | 111541/488281 [23:53<46:26, 135.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111569/488281 [23:53<53:26, 117.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111597/488281 [23:53<49:46, 126.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.6171875, 'l2_loss': 67.21063232421875, 'l1_loss': 56.406558990478516}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.

 23%|██▎       | 111625/488281 [23:54<47:59, 130.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111653/488281 [23:54<47:06, 133.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111681/488281 [23:54<46:39, 134.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111709/488281 [23:54<46:28, 135.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34575653076172, 'l2_loss': 67.14453887939453, 'l1_loss': 57.20121765136719}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 111737/488281 [23:54<46:22, 135.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111765/488281 [23:55<53:16, 117.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111793/488281 [23:55<49:41, 126.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111821/488281 [23:55<47:56, 130.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.11543273925781, 'l2_loss': 67.54789733886719, 'l1_loss': 56.567535400390625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 111849/488281 [23:55<47:04, 133.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111877/488281 [23:56<46:37, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 111905/488281 [23:56<46:27, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.41799926757812, 'l2_loss': 67.47683715820312, 'l1_loss': 56.941165924072266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 111933/488281 [23:56<46:19, 135.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111961/488281 [23:56<53:22, 117.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 111989/488281 [23:56<49:43, 126.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 17.65it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.56it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


 23%|██▎       | 112002/488281 [24:01<11:11:28,  9.34it/s]

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112016/488281 [24:01<8:01:01, 13.04it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112030/488281 [24:01<5:49:11, 17.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112044/488281 [24:01<4:17:37, 24.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112058/488281 [24:02<3:13:52, 32.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112072/488281 [24:02<2:29:25, 41.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112086/488281 [24:02<1:58:22, 52.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112100/488281 [24:02<1:36:41, 64.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.54373168945312, 'l2_loss': 67.22792053222656, 'l1_loss': 56.31581115722656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 112114/488281 [24:02<1:21:33, 76.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112142/488281 [24:02<1:13:49, 84.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112170/488281 [24:02<59:43, 104.96it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112198/488281 [24:03<52:48, 118.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.58516693115234, 'l2_loss': 67.54911804199219, 'l1_loss': 57.036048889160156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 112226/488281 [24:03<49:27, 126.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112254/488281 [24:03<47:47, 131.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112282/488281 [24:03<46:58, 133.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112310/488281 [24:03<46:36, 134.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35957336425781, 'l2_loss': 67.3824462890625, 'l1_loss': 56.97712707519531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112338/488281 [24:04<55:07, 113.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112366/488281 [24:04<50:33, 123.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112394/488281 [24:04<48:18, 129.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.46183013916016,

 23%|██▎       | 112422/488281 [24:04<47:14, 132.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112450/488281 [24:05<46:56, 133.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112478/488281 [24:05<46:33, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112506/488281 [24:05<46:22, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.19536590576172, 'l2_loss': 67.47560119628906, 'l1_loss': 56.719764709472656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 112534/488281 [24:05<55:05, 113.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112562/488281 [24:06<50:32, 123.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112590/488281 [24:06<48:17, 129.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112618/488281 [24:06<47:12, 132.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.75, 'l2_loss': 67.1336669921875, 'l1_loss': 56.616329193115234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([

 23%|██▎       | 112646/488281 [24:06<46:39, 134.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112674/488281 [24:06<46:23, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112702/488281 [24:07<46:17, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.24208068847656, 'l2_loss': 67.56372833251953, 'l1_loss': 56.6783561706543}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112716/488281 [24:07<56:41, 110.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112744/488281 [24:07<51:18, 121.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112772/488281 [24:07<48:39, 128.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112800/488281 [24:07<47:21, 132.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.41073608398438, 'l2_loss': 67.55394744873047, 'l1_loss': 56.856788635253906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 112828/488281 [24:08<46:44, 133.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112856/488281 [24:08<46:24, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112884/488281 [24:08<46:16, 135.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 112912/488281 [24:08<54:01, 115.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.92855834960938, 'l2_loss': 67.14500427246094, 'l1_loss': 56.7835578918457}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 23%|██▎       | 112940/488281 [24:08<49:59, 125.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112968/488281 [24:09<48:00, 130.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 112996/488281 [24:09<47:08, 132.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.95756530761719, 'l2_loss': 67.14566802978516, 'l1_loss': 56.811893463134766}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
ac


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.29it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.68it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 23%|██▎       | 113024/488281 [24:14<7:50:18, 13.30it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113052/488281 [24:14<4:13:57, 24.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113080/488281 [24:14<2:27:56, 42.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113093/488281 [24:14<2:09:00, 48.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.99351501464844, 'l2_loss': 67.160888671875, 'l1_loss': 56.8326301574707}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113107/488281 [24:14<1:43:46, 60.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113121/488281 [24:14<1:26:17, 72.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113135/488281 [24:15<1:14:08, 84.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113149/488281 [24:15<1:05:40, 95.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113163/488281 [24:15<59:46, 104.60it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113177/488281 [24:15<55:39, 112.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113191/488281 [24:15<52:46, 118.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113205/488281 [24:15<50:47, 123.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.23973083496094, 'l2_loss': 67.42337036132812, 'l1_loss': 56.81636428833008}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113219/488281 [24:15<49:22, 126.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113233/488281 [24:15<48:22, 129.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113247/488281 [24:15<47:40, 131.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113261/488281 [24:15<47:12, 132.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113275/488281 [24:16<46:52, 133.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113303/488281 [24:16<53:31, 116.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.16384887695312, 'l2_loss': 67.47593688964844, 'l1_loss': 56.68790817260742}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 113331/488281 [24:16<49:42, 125.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113359/488281 [24:16<47:51, 130.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113387/488281 [24:16<46:55, 133.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113415/488281 [24:17<46:29, 134.37it/s]

{'loss': 124.2266845703125, 'l2_loss': 67.59967803955078, 'l1_loss': 56.62700271606445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 23%|██▎       | 113443/488281 [24:17<46:16, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113471/488281 [24:17<46:21, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113485/488281 [24:17<58:45, 106.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113499/488281 [24:17<54:57, 113.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113513/488281 [24:17<52:18, 119.42it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.57835388183594, 'l2_loss': 66.78404235839844, 'l1_loss': 56.794307708740234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 113527/488281 [24:18<50:25, 123.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113541/488281 [24:18<49:06, 127.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113555/488281 [24:18<48:11, 129.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113569/488281 [24:18<47:32, 131.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113583/488281 [24:18<47:04, 132.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113597/488281 [24:18<46:45, 133.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.22965240478516, 'l2_loss': 67.39683532714844, 'l1_loss': 56.83281707763672}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 113611/488281 [24:18<46:33, 134.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113625/488281 [24:18<46:24, 134.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113639/488281 [24:18<46:17, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113653/488281 [24:19<46:13, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113681/488281 [24:19<53:38, 116.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113709/488281 [24:19<49:46, 125.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35743713378906, 'l2_loss': 67.24891662597656, 'l1_loss': 57.108524322509766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 113737/488281 [24:19<47:50, 130.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113765/488281 [24:19<46:54, 133.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113793/488281 [24:20<46:26, 134.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97779083251953,

 23%|██▎       | 113821/488281 [24:20<46:14, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113849/488281 [24:20<46:07, 135.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113877/488281 [24:20<53:33, 116.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 113905/488281 [24:21<49:43, 125.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.55526733398438, 'l2_loss': 67.60917663574219, 'l1_loss': 56.94608688354492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 113933/488281 [24:21<47:49, 130.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113961/488281 [24:21<46:52, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 113989/488281 [24:21<46:25, 134.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.27it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.67it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 23%|██▎       | 114017/488281 [24:26<7:50:31, 13.26it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114045/488281 [24:26<4:13:59, 24.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 114072/488281 [24:26<2:36:23, 39.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114100/488281 [24:27<1:39:39, 62.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.00119018554688, 'l2_loss': 67.43853759765625, 'l1_loss': 56.562652587890625}
acts.shape=torch.Size([4096, 4096])
acts.shape

 23%|██▎       | 114128/488281 [24:27<1:12:11, 86.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114156/488281 [24:27<58:47, 106.07it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114184/488281 [24:27<52:14, 119.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114212/488281 [24:27<49:03, 127.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.13214111328125, 'l2_loss': 66.9716567993164, 'l1_loss': 56.160484313964844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 114226/488281 [24:28<48:07, 129.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 114254/488281 [24:28<53:57, 115.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 114282/488281 [24:28<49:52, 124.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114310/488281 [24:28<47:53, 130.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.2999267578125, 'l2_loss': 67.65777587890625, 'l1_loss': 56.64215087890625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 23%|██▎       | 114338/488281 [24:28<46:53, 132.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114366/488281 [24:29<46:23, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114394/488281 [24:29<46:10, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.39892578125, 'l2_loss': 66.75446319580078, 'l1_loss': 56.644466400146484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 23%|██▎       | 114422/488281 [24:29<46:04, 135.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 114450/488281 [24:29<52:59, 117.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 114478/488281 [24:30<49:23, 126.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114506/488281 [24:30<47:51, 130.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.94562530517578, 'l2_loss': 66.58258056640625, 'l1_loss': 56.36304473876953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 114534/488281 [24:30<46:52, 132.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114562/488281 [24:30<46:22, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114590/488281 [24:30<46:08, 134.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114618/488281 [24:31<46:02, 135.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.60153198242188, 'l2_loss': 67.21930694580078, 'l1_loss': 56.38222885131836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 114646/488281 [24:31<53:04, 117.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 23%|██▎       | 114674/488281 [24:31<49:24, 126.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 23%|██▎       | 114702/488281 [24:31<47:38, 130.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.47157287597656, 'l2_loss': 67.56561279296875, 'l1_loss': 56.90596389770508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 23%|██▎       | 114730/488281 [24:31<46:45, 133.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 114758/488281 [24:32<46:18, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 114786/488281 [24:32<46:06, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 114814/488281 [24:32<46:02, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.81195831298828, 'l2_loss': 67.70867919921875, 'l1_loss': 57.10327911376953}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▎       | 114828/488281 [24:32<57:04, 109.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▎       | 114856/488281 [24:32<51:22, 121.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 114884/488281 [24:33<48:34, 128.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 114912/488281 [24:33<47:13, 131.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.67544555664062, 'l2_loss': 66.66490173339844, 'l1_loss': 56.01054763793945}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▎       | 114940/488281 [24:33<46:31, 133.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 114968/488281 [24:33<46:12, 134.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 114996/488281 [24:33<46:02, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.14353942871094, 'l2_loss': 67.66923522949219, 'l1_loss': 56.47430419921875}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.89905047416687 4.33434915


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.01it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.81it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 24%|██▎       | 115010/488281 [24:38<10:58:29,  9.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▎       | 115038/488281 [24:38<5:46:02, 17.98it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115066/488281 [24:39<3:12:56, 32.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115094/488281 [24:39<1:57:55, 52.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.86048126220703, 'l2_loss': 67.24015808105469, 'l1_loss': 56.620323181152344}
acts.shape=torch.Size([4096, 4096])
acts.shape

 24%|██▎       | 115122/488281 [24:39<1:21:11, 76.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115150/488281 [24:39<1:03:10, 98.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115178/488281 [24:39<54:20, 114.42it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115192/488281 [24:40<51:48, 120.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.38418579101562, 'l2_loss': 67.63591003417969, 'l1_loss': 56.74827575683594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▎       | 115206/488281 [24:40<59:56, 103.75it/s]

acts.shape=torch.Size([4096, 4096])


 24%|██▎       | 115220/488281 [24:40<55:43, 111.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115248/488281 [24:40<50:38, 122.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115276/488281 [24:40<48:06, 129.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115304/488281 [24:40<46:54, 132.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.90080261230469, 'l2_loss': 67.28732299804688, 'l1_loss': 56.61347579956055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▎       | 115332/488281 [24:41<46:17, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115360/488281 [24:41<45:58, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115388/488281 [24:41<45:50, 135.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▎       | 115416/488281 [24:41<52:36, 118.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.95503234863281, 'l2_loss': 67.979248046875, 'l1_loss': 56.97578048706055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 24%|██▎       | 115444/488281 [24:42<49:03, 126.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115472/488281 [24:42<47:20, 131.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115500/488281 [24:42<46:28, 133.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.22479248046875, 'l2_loss': 66.98165893554688, 'l1_loss': 56.24312973022461}
acts.shape=

 24%|██▎       | 115528/488281 [24:42<46:13, 134.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115556/488281 [24:42<45:56, 135.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115570/488281 [24:42<45:51, 135.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▎       | 115598/488281 [24:43<52:47, 117.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.94474029541016, 'l2_loss': 67.14785766601562, 'l1_loss': 56.79688262939453}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▎       | 115626/488281 [24:43<49:10, 126.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115654/488281 [24:43<47:22, 131.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115682/488281 [24:43<46:29, 133.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115710/488281 [24:44<46:05, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97085571289062, 'l2_loss': 67.38360595703125, 'l1_loss': 56.58724594116211}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▎       | 115738/488281 [24:44<45:51, 135.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115766/488281 [24:44<45:45, 135.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▎       | 115794/488281 [24:44<52:36, 118.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.91156005859375, 'l2_loss': 67.32976531982422, 'l1_l

 24%|██▎       | 115822/488281 [24:44<49:03, 126.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115850/488281 [24:45<47:18, 131.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115878/488281 [24:45<46:27, 133.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115906/488281 [24:45<46:02, 134.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.4301986694336, 'l2_loss': 67.44715881347656, 'l1_loss': 56.98303985595703}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 24%|██▎       | 115934/488281 [24:45<45:49, 135.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▎       | 115962/488281 [24:45<45:44, 135.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 115990/488281 [24:46<52:34, 118.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.27it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.67it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 24%|██▍       | 116017/488281 [24:51<7:57:21, 13.00it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116045/488281 [24:51<4:15:31, 24.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116073/488281 [24:51<2:28:03, 41.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116101/488281 [24:51<1:35:43, 64.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.76737976074219, 'l2_loss': 67.25762939453125, 'l1_loss': 56.50974655151367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 116129/488281 [24:51<1:10:07, 88.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116157/488281 [24:52<57:35, 107.70it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 116185/488281 [24:52<58:45, 105.56it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116213/488281 [24:52<52:03, 119.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.2040023803711, 'l2_loss': 66.66886901855469, 'l1_loss': 56.535133361816406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 116241/488281 [24:52<48:44, 127.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116269/488281 [24:52<47:07, 131.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116297/488281 [24:53<46:19, 133.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116325/488281 [24:53<45:56, 134.93it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.82041931152344, 'l2_loss': 67.15121459960938, 'l1_loss': 56.6692008972168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 24%|██▍       | 116339/488281 [24:53<45:49, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 116367/488281 [24:53<52:42, 117.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 116395/488281 [24:53<49:02, 126.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.02017211914062, 'l2_loss': 67.85003662109375, 'l1_loss': 57.17013168334961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 116423/488281 [24:54<47:16, 131.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116451/488281 [24:54<46:23, 133.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116479/488281 [24:54<45:56, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116507/488281 [24:54<45:45, 135.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.8540267944336, 'l2_loss': 67.132568359375, 'l1_loss': 56.721458435058594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 24%|██▍       | 116535/488281 [24:54<45:50, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 116563/488281 [24:55<52:46, 117.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 116591/488281 [24:55<49:04, 126.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116619/488281 [24:55<47:16, 131.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.40948486328125, 'l2_loss': 67.46029663085938, 'l1_loss': 56.94919204711914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 116647/488281 [24:55<46:21, 133.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116675/488281 [24:56<45:55, 134.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116703/488281 [24:56<45:44, 135.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.28746032714844, 'l2_loss': 67.32350158691406, 'l1_loss': 56.963958740234375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 24%|██▍       | 116731/488281 [24:56<45:37, 135.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 116759/488281 [24:56<59:45, 103.62it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 116787/488281 [24:57<52:28, 117.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116815/488281 [24:57<48:55, 126.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.54497528076172, 'l2_loss': 66.84600830078125, 'l1_loss': 56.69896697998047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 116843/488281 [24:57<47:10, 131.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116871/488281 [24:57<46:18, 133.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116899/488281 [24:57<45:53, 134.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.17735290527344,

 24%|██▍       | 116927/488281 [24:58<45:42, 135.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 116941/488281 [24:58<56:30, 109.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 116969/488281 [24:58<50:52, 121.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 116997/488281 [24:58<48:07, 128.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.67703247070312, 'l2_loss': 66.99502563476562, 'l1_loss': 56.682003021240234}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8994107246398926 4.33768


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 17.93it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.61it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 24%|██▍       | 117025/488281 [25:03<7:46:01, 13.28it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117053/488281 [25:03<4:11:41, 24.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117081/488281 [25:03<2:26:31, 42.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117109/488281 [25:04<1:35:00, 65.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.3218994140625, 'l2_loss': 67.52598571777344, 'l1_loss': 56.79591751098633}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 24%|██▍       | 117123/488281 [25:04<1:30:00, 68.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117151/488281 [25:04<1:07:16, 91.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117179/488281 [25:04<56:08, 110.16it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117207/488281 [25:04<50:42, 121.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05197143554688, 'l2_loss': 67.39445495605469, 'l1_loss': 56.65751647949219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 117235/488281 [25:05<48:01, 128.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117263/488281 [25:05<46:42, 132.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117291/488281 [25:05<46:04, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117305/488281 [25:05<45:55, 134.63it/s]

{'loss': 123.16368103027344, 'l2_loss': 67.21360778808594, 'l1_loss': 55.950077056884766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117333/488281 [25:05<52:29, 117.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117361/488281 [25:06<48:53, 126.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117389/488281 [25:06<47:07, 131.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.87633514404297, 'l2_loss': 67.56287384033203, 'l1_l

 24%|██▍       | 117417/488281 [25:06<46:16, 133.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117445/488281 [25:06<45:49, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117473/488281 [25:06<45:37, 135.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117501/488281 [25:07<45:32, 135.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.04473876953125, 'l2_loss': 67.19891357421875, 'l1_loss': 55.845821380615234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117529/488281 [25:07<52:17, 118.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117543/488281 [25:07<50:13, 123.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117557/488281 [25:07<49:04, 125.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117571/488281 [25:07<47:59, 128.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117585/488281 [25:07<47:13, 130.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117599/488281 [25:07<46:40, 132.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.91380310058594, 'l2_loss': 67.08522033691406, 'l1_loss': 56.82857894897461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117613/488281 [25:07<46:19, 133.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117627/488281 [25:08<46:02, 134.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117641/488281 [25:08<45:50, 134.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117655/488281 [25:08<45:42, 135.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117669/488281 [25:08<45:37, 135.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117683/488281 [25:08<45:33, 135.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117711/488281 [25:08<52:28, 117.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.51342010498047, 'l2_loss': 67.882568359375, 'l1_loss': 56.63085174560547}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 24%|██▍       | 117739/488281 [25:08<48:52, 126.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117767/488281 [25:09<47:05, 131.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117795/488281 [25:09<46:12, 133.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.3512191772461, 'l2_loss': 67.085693359375, 'l1_loss': 56.265525817871094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 24%|██▍       | 117823/488281 [25:09<45:48, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117851/488281 [25:09<45:35, 135.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117879/488281 [25:09<45:29, 135.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 117907/488281 [25:10<52:22, 117.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.54562377929688, 'l2_loss': 67.02545166015625, 'l1_loss': 56.52016830444336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 117935/488281 [25:10<48:47, 126.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117963/488281 [25:10<47:01, 131.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 117991/488281 [25:10<46:10, 133.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.34it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 24%|██▍       | 118019/488281 [25:15<7:45:04, 13.27it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118047/488281 [25:15<4:11:00, 24.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118075/488281 [25:16<2:26:18, 42.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118102/488281 [25:16<1:45:39, 58.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.67351531982422, 'l2_loss': 66.7158203125, 'l1_loss': 56.95769500732422}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 24%|██▍       | 118130/488281 [25:16<1:14:40, 82.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118158/488281 [25:16<59:39, 103.39it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118186/488281 [25:16<52:20, 117.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118214/488281 [25:17<48:47, 126.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.69571685791016, 'l2_loss': 66.87763977050781, 'l1_loss': 56.818077087402344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 24%|██▍       | 118242/488281 [25:17<47:01, 131.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118270/488281 [25:17<46:10, 133.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118284/488281 [25:17<56:54, 108.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118312/488281 [25:17<51:02, 120.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.70423126220703, 'l2_loss': 67.10523986816406, 'l1_loss': 56.59899139404297}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 118340/488281 [25:18<48:06, 128.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118368/488281 [25:18<46:41, 132.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118396/488281 [25:18<45:58, 134.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.80368041992188, 'l2_loss': 66.6344985961914, 'l1_loss': 56.169185638427734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 118424/488281 [25:18<45:39, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118452/488281 [25:18<45:29, 135.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118480/488281 [25:19<52:17, 117.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118508/488281 [25:19<48:45, 126.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34099578857422, 'l2_loss': 67.72482299804688, 'l1_loss': 56.616172790527344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 24%|██▍       | 118536/488281 [25:19<46:59, 131.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118564/488281 [25:19<46:07, 133.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118592/488281 [25:20<45:56, 134.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.07513427734375,

 24%|██▍       | 118620/488281 [25:20<45:37, 135.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118648/488281 [25:20<45:27, 135.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118676/488281 [25:20<52:20, 117.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118704/488281 [25:20<48:45, 126.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.2620620727539, 'l2_loss': 66.73802185058594, 'l1_loss': 56.52404022216797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 24%|██▍       | 118732/488281 [25:21<46:58, 131.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118760/488281 [25:21<46:05, 133.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118788/488281 [25:21<45:40, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118816/488281 [25:21<45:29, 135.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.29145812988281, 'l2_loss': 66.87452697753906, 'l1_loss': 56.416934967041016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 24%|██▍       | 118844/488281 [25:22<45:22, 135.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118872/488281 [25:22<52:11, 117.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 118900/488281 [25:22<48:39, 126.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.12686157226562,

 24%|██▍       | 118928/488281 [25:22<46:55, 131.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118956/488281 [25:22<46:04, 133.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118984/488281 [25:23<45:38, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 118998/488281 [25:23<45:31, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.23946380615234, 'l2_loss': 67.36141967773438, 'l1_loss': 56.87804412841797}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8213870525360107 4.271767616271973 8.895790100097656
91.12%
Reconstruction: (0.9112446213841096, 3.8213870525360107, 4.271767616271973, 8.895790100097656)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.30it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.68it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 24%|██▍       | 119026/488281 [25:27<7:41:41, 13.33it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119040/488281 [25:28<5:46:17, 17.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 119068/488281 [25:28<3:12:43, 31.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 119096/488281 [25:28<1:57:40, 52.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.21650695800781, 'l2_loss': 67.24922180175781, 'l1_loss': 56.96728515625}
acts.shape=tor

 24%|██▍       | 119124/488281 [25:28<1:20:44, 76.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119152/488281 [25:28<1:02:37, 98.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119180/488281 [25:29<53:44, 114.45it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119208/488281 [25:29<49:25, 124.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.00146484375, 'l2_loss': 67.0465316772461, 'l1_loss': 55.954933166503906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 24%|██▍       | 119222/488281 [25:29<48:10, 127.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

 24%|██▍       | 119250/488281 [25:29<53:37, 114.69it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 119278/488281 [25:29<49:20, 124.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119306/488281 [25:30<47:15, 130.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.87149810791016, 'l2_loss': 67.24272155761719, 'l1_loss': 56.62877655029297}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 24%|██▍       | 119334/488281 [25:30<46:12, 133.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119362/488281 [25:30<45:41, 134.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119390/488281 [25:30<45:26, 135.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119418/488281 [25:30<45:20, 135.60it/s]

{'loss': 123.81468200683594, 'l2_loss': 67.46868896484375, 'l1_loss': 56.34599304199219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 119446/488281 [25:31<52:04, 118.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 119474/488281 [25:31<48:33, 126.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119502/488281 [25:31<46:51, 131.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.32656860351562, 'l2_loss': 67.50957489013672, 'l1_loss': 56.816993713378906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 24%|██▍       | 119530/488281 [25:31<45:59, 133.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119558/488281 [25:32<45:34, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119586/488281 [25:32<45:22, 135.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 24%|██▍       | 119614/488281 [25:32<45:27, 135.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.57321166992188, 'l2_loss': 67.16378784179688, 'l1_loss': 56.409423828125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 24%|██▍       | 119628/488281 [25:32<55:22, 110.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 119656/488281 [25:32<50:10, 122.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 119684/488281 [25:33<47:36, 129.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 119712/488281 [25:33<46:22, 132.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.48222351074219, 'l2_loss': 67.10523986816406, 'l1_loss': 56.37698745727539}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 25%|██▍       | 119740/488281 [25:33<45:44, 134.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 119768/488281 [25:33<45:26, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 119796/488281 [25:33<45:17, 135.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.56858825683594, 'l2_loss': 67.20182037353516, 'l1_loss': 56.36676788330078}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 119824/488281 [25:34<52:08, 117.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 119852/488281 [25:34<48:33, 126.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 119880/488281 [25:34<46:48, 131.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 119908/488281 [25:34<45:58, 133.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.63180541992188, 'l2_loss': 66.77528381347656, 'l1_loss': 56.85651779174805}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 25%|██▍       | 119936/488281 [25:35<45:32, 134.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 119964/488281 [25:35<45:19, 135.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 119992/488281 [25:35<45:13, 135.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 76



  0%|          | 0/50 [00:00<?, ?it/s][A
  4%|▍         | 2/50 [00:00<00:02, 17.88it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



  8%|▊         | 4/50 [00:00<00:03, 12.68it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 12%|█▏        | 6/50 [00:00<00:03, 11.60it/s][A

acts.shape=torch.Size([65536, 4096])



 16%|█▌        | 8/50 [00:00<00:03, 11.16it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 20%|██        | 10/50 [00:00<00:03, 10.83it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 24%|██▍       | 12/50 [00:01<00:03, 10.73it/s][A

acts.shape=torch.Size([65536, 4096])



 28%|██▊       | 14/50 [00:01<00:03, 10.67it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 32%|███▏      | 16/50 [00:01<00:03, 10.62it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 36%|███▌      | 18/50 [00:01<00:03, 10.59it/s][A

acts.shape=torch.Size([65536, 4096])



 40%|████      | 20/50 [00:01<00:02, 10.58it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 44%|████▍     | 22/50 [00:02<00:02, 10.56it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 48%|████▊     | 24/50 [00:02<00:02, 10.55it/s][A

acts.shape=torch.Size([65536, 4096])



 52%|█████▏    | 26/50 [00:02<00:02, 10.55it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 56%|█████▌    | 28/50 [00:02<00:02, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 60%|██████    | 30/50 [00:02<00:01, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])



 64%|██████▍   | 32/50 [00:02<00:01, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 68%|██████▊   | 34/50 [00:03<00:01, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 72%|███████▏  | 36/50 [00:03<00:01, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])



 76%|███████▌  | 38/50 [00:03<00:01, 10.54it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 40/50 [00:03<00:00, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 84%|████████▍ | 42/50 [00:03<00:00, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])



 88%|████████▊ | 44/50 [00:04<00:00, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 92%|█████████▏| 46/50 [00:04<00:00, 10.52it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 96%|█████████▌| 48/50 [00:04<00:00, 10.53it/s][A

acts.shape=torch.Size([65536, 4096])



100%|██████████| 50/50 [00:04<00:00, 10.69it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 




tensor(0.0002, device='cuda:0')
Resetting neurons! tensor(1, device='cuda:0')
torch.Size([4096, 512]) torch.Size([512, 4096]) torch.Size([4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.73973846435547, 'l2_loss': 67.05648040771484, 'l1_loss': 56.683258056640625}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.936630964279175 4.368429660797119 9.004344940185547
91.48%
Reconstruction: (0.9147941855892299, 3.936630964279175, 4.368429660797119, 9.004344940185547)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.33it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.66it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 25%|██▍       | 120020/488281 [25:44<15:07:13,  6.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120048/488281 [25:45<7:47:31, 13.13it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120076/488281 [25:45<4:12:05, 24.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120104/488281 [25:45<2:26:32, 41.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.87921905517578, 'l2_loss': 66.63956451416016, 'l1_loss': 56.239654541015625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▍       | 120132/488281 [25:45<1:34:48, 64.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120160/488281 [25:46<1:09:28, 88.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120188/488281 [25:46<57:02, 107.54it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 120216/488281 [25:46<57:48, 106.12it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.03440856933594, 'l2_loss': 66.23136901855469, 'l1_loss': 56.803035736083984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▍       | 120244/488281 [25:46<51:19, 119.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120272/488281 [25:46<48:08, 127.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120300/488281 [25:47<46:33, 131.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.33309936523438,

 25%|██▍       | 120328/488281 [25:47<45:49, 133.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120356/488281 [25:47<45:26, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120370/488281 [25:47<45:20, 135.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 120398/488281 [25:47<52:06, 117.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.31987762451172, 'l2_loss': 67.66238403320312, 'l1_loss': 56.657493591308594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▍       | 120426/488281 [25:48<48:31, 126.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120454/488281 [25:48<46:45, 131.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120482/488281 [25:48<45:53, 133.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120510/488281 [25:48<45:29, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.59748840332031, 'l2_loss': 67.3036880493164, 'l1_loss': 56.29380416870117}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 25%|██▍       | 120538/488281 [25:48<45:28, 134.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120566/488281 [25:49<45:15, 135.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 120594/488281 [25:49<52:07, 117.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.75666809082031, 'l2_loss': 67.09857940673828, 'l1_l

 25%|██▍       | 120622/488281 [25:49<48:32, 126.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120650/488281 [25:49<46:44, 131.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120678/488281 [25:50<45:51, 133.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120706/488281 [25:50<45:27, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05307006835938, 'l2_loss': 67.35810089111328, 'l1_loss': 56.69497299194336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 25%|██▍       | 120734/488281 [25:50<45:14, 135.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120762/488281 [25:50<45:08, 135.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 120790/488281 [25:50<51:54, 117.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 120818/488281 [25:51<48:24, 126.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.43209075927734, 'l2_loss': 66.98003387451172, 'l1_loss': 56.452056884765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▍       | 120846/488281 [25:51<46:40, 131.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120874/488281 [25:51<45:49, 133.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120902/488281 [25:51<45:25, 134.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.06062316894531, 'l2_loss': 67.4987564086914, 'l1_loss': 56.56187057495117}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 25%|██▍       | 120930/488281 [25:51<45:11, 135.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 120958/488281 [25:52<45:05, 135.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 120972/488281 [25:52<54:51, 111.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 121000/488281 [25:52<49:49, 122.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.39631652832031, 'l2_loss': 67.443115234375, 'l1_loss': 56.95320129394531}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8629989624023438 4.300656795501709 8.972456932067871
91.43%
Reconstruction: (0.9143435887529152, 3.8629989624023438, 4.300656795501709, 8.972456932067871)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 13.64it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 11.57it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 25%|██▍       | 121027/488281 [25:57<7:51:22, 12.98it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121055/488281 [25:57<4:12:18, 24.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121083/488281 [25:57<2:26:11, 41.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121111/488281 [25:57<1:34:30, 64.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.39900970458984, 'l2_loss': 67.06231689453125, 'l1_loss': 56.336692810058594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▍       | 121139/488281 [25:58<1:09:13, 88.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121153/488281 [25:58<1:11:45, 85.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 121181/488281 [25:58<58:06, 105.29it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121209/488281 [25:58<51:26, 118.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.89523315429688, 'l2_loss': 67.25250244140625, 'l1_loss': 56.642730712890625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▍       | 121237/488281 [25:58<48:07, 127.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121265/488281 [25:59<46:30, 131.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121293/488281 [25:59<45:42, 133.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121321/488281 [25:59<45:21, 134.83it/s]

{'loss': 124.34742736816406, 'l2_loss': 67.81336975097656, 'l1_loss': 56.5340576171875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 25%|██▍       | 121335/488281 [25:59<45:14, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 121363/488281 [25:59<51:56, 117.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121391/488281 [26:00<48:22, 126.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.28699493408203, 'l2_loss': 67.24332427978516, 'l1_l

 25%|██▍       | 121419/488281 [26:00<46:38, 131.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121447/488281 [26:00<45:46, 133.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121475/488281 [26:00<45:20, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121503/488281 [26:00<45:09, 135.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.98841094970703, 'l2_loss': 67.37295532226562, 'l1_loss': 56.615455627441406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▍       | 121531/488281 [26:01<45:02, 135.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 121559/488281 [26:01<59:11, 103.26it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 121587/488281 [26:01<51:55, 117.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121615/488281 [26:01<48:24, 126.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05801391601562, 'l2_loss': 67.40394592285156, 'l1_loss': 56.6540641784668}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 25%|██▍       | 121643/488281 [26:02<46:39, 130.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121671/488281 [26:02<45:48, 133.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121699/488281 [26:02<45:21, 134.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.13880920410156,

 25%|██▍       | 121727/488281 [26:02<45:09, 135.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 121741/488281 [26:02<55:09, 110.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 121769/488281 [26:03<49:56, 122.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121797/488281 [26:03<47:23, 128.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.86958312988281, 'l2_loss': 67.27597045898438, 'l1_loss': 56.5936164855957}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 25%|██▍       | 121825/488281 [26:03<46:08, 132.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121853/488281 [26:03<45:31, 134.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121881/488281 [26:03<45:13, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121909/488281 [26:04<45:06, 135.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.70242309570312, 'l2_loss': 67.14773559570312, 'l1_loss': 56.554691314697266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 121937/488281 [26:04<51:50, 117.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 121965/488281 [26:04<48:18, 126.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 121993/488281 [26:04<46:34, 131.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 77
acts.shape=torch.Size([4096, 4096])
{'loss':


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 12.01it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 10.75it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 10.71it/s]
 25%|██▍       | 122007/488281 [26:09<10:47:57,  9.42it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▍       | 122035/488281 [26:09<5:40:24, 17.93it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▍       | 122063/488281 [26:10<3:09:52, 32.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122091/488281 [26:10<1:55:56, 52.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122105/488281 [26:10<1:34:41, 64.45it/s]

{'loss': 124.48042297363281, 'l2_loss': 67.42606353759766, 'l1_loss': 57.05436325073242}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 122133/488281 [26:10<1:18:54, 77.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122161/488281 [26:10<1:01:33, 99.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122189/488281 [26:11<53:04, 114.97it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122217/488281 [26:11<48:55, 124.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.81141662597656, 'l2_loss': 67.32725524902344, 'l1_loss': 56.48416519165039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 25%|██▌       | 122245/488281 [26:11<46:52, 130.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122273/488281 [26:11<45:52, 132.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122301/488281 [26:11<45:24, 134.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.53096008300781, 'l2_loss': 67.56182861328125, 'l1_loss': 56.96913146972656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 122329/488281 [26:12<53:41, 113.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 122357/488281 [26:12<49:12, 123.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122385/488281 [26:12<47:00, 129.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122413/488281 [26:12<45:56, 132.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.96685028076172, 'l2_loss': 67.17214965820312, 'l1_loss': 56.794700622558594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▌       | 122441/488281 [26:12<45:23, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122469/488281 [26:13<45:08, 135.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122483/488281 [26:13<45:03, 135.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 122511/488281 [26:13<52:23, 116.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.40208435058594, 'l2_loss': 66.77989959716797, 'l1_loss': 56.622188568115234}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▌       | 122539/488281 [26:13<48:33, 125.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122567/488281 [26:13<47:03, 129.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 122595/488281 [26:14<45:55, 132.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.89955139160156, 'l2_loss': 67.26380920410156, 'l1_loss': 56.635738372802734}
acts.shape=torch.Size([4096, 4096])
acts.shape

 25%|██▌       | 122623/488281 [26:14<45:22, 134.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122651/488281 [26:14<45:06, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122679/488281 [26:14<44:59, 135.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 122707/488281 [26:15<52:41, 115.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.2999038696289, 'l2_loss': 67.73893737792969, 'l1_loss': 56.56096649169922}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 25%|██▌       | 122735/488281 [26:15<48:40, 125.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122763/488281 [26:15<46:43, 130.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122791/488281 [26:15<45:46, 133.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122819/488281 [26:15<45:17, 134.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.63809204101562, 'l2_loss': 67.25584411621094, 'l1_loss': 56.38225173950195}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 25%|██▌       | 122847/488281 [26:16<45:03, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122875/488281 [26:16<44:56, 135.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 122903/488281 [26:16<52:14, 116.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.50761413574219, 'l2_loss': 67.45111083984375, 'l1_loss': 57.05649948120117}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 25%|██▌       | 122931/488281 [26:16<48:26, 125.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122959/488281 [26:17<46:35, 130.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 122987/488281 [26:17<45:40, 133.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.38it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


 25%|██▌       | 123001/488281 [26:21<10:33:38,  9.61it/s]

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123015/488281 [26:21<7:36:57, 13.32it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123029/488281 [26:22<5:33:17, 18.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123043/488281 [26:22<4:06:44, 24.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123057/488281 [26:22<3:06:08, 32.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123071/488281 [26:22<2:23:56, 42.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123098/488281 [26:22<1:41:30, 59.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.12400817871094, 'l2_loss': 67.38310241699219, 'l1_loss': 56.74090576171875}
acts.shape=

 25%|██▌       | 123126/488281 [26:22<1:12:22, 84.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123154/488281 [26:23<58:13, 104.50it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123182/488281 [26:23<51:21, 118.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123210/488281 [26:23<48:00, 126.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.185546875, 'l2_loss': 67.42858123779297, 'l1_loss': 56.756961822509766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 25%|██▌       | 123238/488281 [26:23<46:21, 131.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123252/488281 [26:23<45:53, 132.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123280/488281 [26:24<53:47, 113.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123308/488281 [26:24<49:12, 123.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.12095642089844, 'l2_loss': 67.29912567138672, 'l1_loss': 56.82182693481445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 25%|██▌       | 123336/488281 [26:24<46:55, 129.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123364/488281 [26:24<45:48, 132.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123392/488281 [26:24<45:14, 134.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.47950744628906,

 25%|██▌       | 123420/488281 [26:25<45:00, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123448/488281 [26:25<44:52, 135.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123476/488281 [26:25<52:29, 115.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123504/488281 [26:25<48:33, 125.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.08343505859375, 'l2_loss': 67.2413558959961, 'l1_loss': 56.84207534790039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 25%|██▌       | 123532/488281 [26:26<46:35, 130.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123560/488281 [26:26<45:37, 133.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123588/488281 [26:26<45:20, 134.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123616/488281 [26:26<45:01, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.88528442382812, 'l2_loss': 68.0584716796875, 'l1_loss': 56.82680892944336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 25%|██▌       | 123644/488281 [26:26<44:53, 135.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123672/488281 [26:27<52:10, 116.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123700/488281 [26:27<48:21, 125.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.84259033203125,

 25%|██▌       | 123728/488281 [26:27<46:30, 130.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123756/488281 [26:27<45:33, 133.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123784/488281 [26:27<45:06, 134.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123812/488281 [26:28<44:55, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.71961212158203, 'l2_loss': 67.75929260253906, 'l1_loss': 56.96031951904297}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 25%|██▌       | 123826/488281 [26:28<44:51, 135.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123854/488281 [26:28<53:04, 114.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 123882/488281 [26:28<48:46, 124.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123910/488281 [26:28<46:43, 129.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.87167358398438, 'l2_loss': 67.68505859375, 'l1_loss': 56.18661880493164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 25%|██▌       | 123938/488281 [26:29<45:39, 132.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123966/488281 [26:29<45:09, 134.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 123994/488281 [26:29<44:53, 135.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 78
acts.shape=torch.Size([4096, 4096])
{'loss': 123.75145721435547, 'l2_loss': 67.38636779785156, 'l1_loss': 56.365089416503906}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size(


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.23it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 25%|██▌       | 124022/488281 [26:34<7:42:53, 13.12it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 124033/488281 [26:34<6:07:34, 16.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124047/488281 [26:34<4:26:07, 22.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124061/488281 [26:34<3:17:30, 30.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124075/488281 [26:34<2:30:36, 40.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124089/488281 [26:34<1:58:33, 51.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.67457580566406, 'l2_loss': 66.90582275390625, 'l1_loss': 56.76874923706055}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124103/488281 [26:35<1:36:11, 63.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124117/488281 [26:35<1:20:37, 75.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124131/488281 [26:35<1:09:46, 86.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124145/488281 [26:35<1:02:11, 97.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124159/488281 [26:35<56:55, 106.62it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124173/488281 [26:35<53:13, 114.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124187/488281 [26:35<50:38, 119.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124201/488281 [26:35<48:51, 124.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.59837341308594, 'l2_loss': 67.276123046875, 'l1_loss': 56.32224655151367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124215/488281 [26:35<47:35, 127.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124229/488281 [26:36<56:34, 107.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124243/488281 [26:36<52:59, 114.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124257/488281 [26:36<50:28, 120.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124271/488281 [26:36<48:43, 124.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124285/488281 [26:36<47:29, 127.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124299/488281 [26:36<46:37, 130.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.3919677734375, 'l2_loss': 67.36178588867188, 'l1_loss': 57.030181884765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124313/488281 [26:36<46:03, 131.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124327/488281 [26:36<45:37, 132.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124341/488281 [26:36<45:19, 133.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124355/488281 [26:37<45:06, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124369/488281 [26:37<44:58, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124383/488281 [26:37<44:51, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124397/488281 [26:37<44:47, 135.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124411/488281 [26:37<44:47, 135.39it/s]

{'loss': 124.19427490234375, 'l2_loss': 67.44303894042969, 'l1_loss': 56.7512321472168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 25%|██▌       | 124439/488281 [26:37<51:25, 117.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 124467/488281 [26:37<47:56, 126.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 25%|██▌       | 124495/488281 [26:38<46:13, 131.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.52278137207031, 'l2_loss': 67.35738372802734, 'l1_l

 26%|██▌       | 124523/488281 [26:38<45:23, 133.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124551/488281 [26:38<44:58, 134.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124579/488281 [26:38<44:47, 135.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124607/488281 [26:38<44:56, 134.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.37496948242188, 'l2_loss': 67.24210357666016, 'l1_loss': 56.132869720458984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.37074279785156, 'l2_loss': 66.519


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.24it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 26%|██▌       | 124635/488281 [26:43<7:41:42, 13.13it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124663/488281 [26:43<4:08:57, 24.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124691/488281 [26:44<2:24:43, 41.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124719/488281 [26:44<1:33:40, 64.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.27946472167969, 'l2_loss': 67.71978759765625, 'l1_loss': 56.5596809387207}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 26%|██▌       | 124747/488281 [26:44<1:08:38, 88.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124775/488281 [26:44<56:22, 107.48it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124789/488281 [26:44<52:50, 114.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 124817/488281 [26:45<55:17, 109.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.76275634765625, 'l2_loss': 67.36412048339844, 'l1_loss': 56.39863967895508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▌       | 124845/488281 [26:45<49:48, 121.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124873/488281 [26:45<47:08, 128.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124901/488281 [26:45<45:48, 132.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.31703186035156,

 26%|██▌       | 124929/488281 [26:45<45:10, 134.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124957/488281 [26:46<44:51, 134.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 124985/488281 [26:46<44:42, 135.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 125013/488281 [26:46<51:29, 117.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.41044616699219, 'l2_loss': 66.80569458007812, 'l1_loss': 56.6047477722168}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 26%|██▌       | 125041/488281 [26:46<47:56, 126.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125069/488281 [26:47<46:11, 131.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125097/488281 [26:47<45:21, 133.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125125/488281 [26:47<45:13, 133.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.89311218261719, 'l2_loss': 67.38284301757812, 'l1_loss': 56.5102653503418}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 26%|██▌       | 125153/488281 [26:47<44:51, 134.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125181/488281 [26:47<44:41, 135.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 125209/488281 [26:48<51:30, 117.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 125237/488281 [26:48<47:57, 126.17it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.04765319824219, 'l2_loss': 66.91523742675781, 'l1_loss': 56.13241195678711}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▌       | 125265/488281 [26:48<46:11, 130.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125293/488281 [26:48<45:20, 133.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125321/488281 [26:49<44:56, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.46672058105469, 'l2_loss': 67.67826080322266, 'l1_loss': 56.7884635925293}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 26%|██▌       | 125349/488281 [26:49<44:43, 135.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125363/488281 [26:49<44:39, 135.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 125391/488281 [26:49<51:50, 116.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 125419/488281 [26:49<48:08, 125.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.43296813964844, 'l2_loss': 66.86329650878906, 'l1_loss': 56.56966781616211}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▌       | 125447/488281 [26:50<46:16, 130.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125475/488281 [26:50<45:22, 133.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125503/488281 [26:50<44:56, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.49787902832031, 'l2_loss': 67.0702896118164, 'l1_loss': 56.42758560180664}
acts.shape=t

 26%|██▌       | 125531/488281 [26:50<44:44, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125559/488281 [26:50<44:37, 135.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 125587/488281 [26:51<52:02, 116.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 125601/488281 [26:51<49:46, 121.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.08293151855469, 'l2_loss': 66.88861846923828, 'l1_loss': 56.19431686401367}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8525919914245605 4.2907538414001465 8.926966667175293
91.37%
Reconstruction: (0.9136520501591141,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.41it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.89it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 26%|██▌       | 125628/488281 [26:55<7:41:42, 13.09it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125656/488281 [26:56<4:07:21, 24.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125684/488281 [26:56<2:23:30, 42.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125712/488281 [26:56<1:32:57, 65.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.300537109375, 'l2_loss': 67.59498596191406, 'l1_loss': 56.70555114746094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 26%|██▌       | 125740/488281 [26:56<1:08:12, 88.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125768/488281 [26:57<1:05:36, 92.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 125796/488281 [26:57<54:50, 110.17it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125824/488281 [26:57<49:34, 121.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.48942565917969, 'l2_loss': 67.0479507446289, 'l1_loss': 56.441471099853516}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▌       | 125852/488281 [26:57<46:58, 128.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125880/488281 [26:57<45:40, 132.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125908/488281 [26:58<45:04, 134.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 125936/488281 [26:58<44:47, 134.83it/s]

{'loss': 124.35954284667969, 'l2_loss': 67.59700012207031, 'l1_loss': 56.762542724609375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 26%|██▌       | 125950/488281 [26:58<44:41, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 125978/488281 [26:58<51:12, 117.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126006/488281 [26:58<47:45, 126.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35523986816406, 'l2_loss': 67.47399139404297, 'l1_l

 26%|██▌       | 126034/488281 [26:59<46:05, 130.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126062/488281 [26:59<45:14, 133.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126090/488281 [26:59<44:50, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126118/488281 [26:59<44:40, 135.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.73161315917969, 'l2_loss': 67.07721710205078, 'l1_loss': 56.65439224243164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▌       | 126132/488281 [26:59<44:47, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126160/488281 [27:00<52:57, 113.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126188/488281 [27:00<48:37, 124.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126216/488281 [27:00<46:30, 129.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.35039520263672, 'l2_loss': 67.7164077758789, 'l1_loss': 56.63398742675781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 26%|██▌       | 126244/488281 [27:00<45:27, 132.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126272/488281 [27:00<44:55, 134.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126300/488281 [27:01<44:40, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126328/488281 [27:01<44:35, 135.27it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.56519317626953, 'l2_loss': 67.53643798828125, 'l1_loss': 57.02875518798828}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126356/488281 [27:01<51:10, 117.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126384/488281 [27:01<47:43, 126.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126412/488281 [27:02<46:04, 130.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.51786804199219, 'l2_loss': 66.82984161376953, 'l1_loss': 56.68802261352539}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▌       | 126440/488281 [27:02<45:14, 133.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126468/488281 [27:02<44:48, 134.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126496/488281 [27:02<44:36, 135.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126524/488281 [27:02<44:32, 135.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.69213104248047, 'l2_loss': 67.41349029541016, 'l1_loss': 57.27864074707031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126552/488281 [27:03<51:08, 117.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126580/488281 [27:03<47:41, 126.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126608/488281 [27:03<46:00, 131.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.22it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 26%|██▌       | 126636/488281 [27:08<7:33:58, 13.28it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126664/488281 [27:08<4:05:04, 24.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126692/488281 [27:08<2:22:43, 42.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126706/488281 [27:08<1:53:12, 53.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.5526351928711, 'l2_loss': 67.30380249023438, 'l1_loss': 56.24883270263672}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126734/488281 [27:09<1:24:53, 70.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126762/488281 [27:09<1:04:14, 93.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126790/488281 [27:09<54:06, 111.36it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126818/488281 [27:09<49:10, 122.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.10467529296875, 'l2_loss': 67.24560546875, 'l1_loss': 56.859073638916016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 26%|██▌       | 126846/488281 [27:09<46:43, 128.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126874/488281 [27:10<45:31, 132.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126902/488281 [27:10<44:56, 134.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.75492858886719, 'l2_loss': 67.50823974609375, 'l1_loss': 56.2466926574707}


 26%|██▌       | 126930/488281 [27:10<51:24, 117.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 126958/488281 [27:10<47:49, 125.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 126986/488281 [27:10<46:02, 130.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127014/488281 [27:11<45:12, 133.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.75700378417969, 'l2_loss': 67.39324188232422, 'l1_loss': 56.3637580871582}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 26%|██▌       | 127042/488281 [27:11<44:46, 134.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127070/488281 [27:11<44:33, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127098/488281 [27:11<44:27, 135.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 127126/488281 [27:12<51:10, 117.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.94654846191406, 'l2_loss': 67.40814208984375, 'l1_loss': 56.53840637207031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▌       | 127154/488281 [27:12<47:52, 125.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 127182/488281 [27:12<46:03, 130.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127210/488281 [27:12<45:11, 133.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.0588607788086, 'l2_loss': 67.62702941894531, 'l1_lo

 26%|██▌       | 127238/488281 [27:12<44:45, 134.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127266/488281 [27:13<44:32, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127294/488281 [27:13<44:26, 135.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 127308/488281 [27:13<54:15, 110.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.68382263183594, 'l2_loss': 67.7765884399414, 'l1_loss': 56.9072380065918}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 26%|██▌       | 127336/488281 [27:13<49:12, 122.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127364/488281 [27:13<46:42, 128.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127392/488281 [27:14<45:29, 132.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127420/488281 [27:14<44:53, 133.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.52447509765625, 'l2_loss': 66.95294952392578, 'l1_loss': 56.571529388427734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 26%|██▌       | 127448/488281 [27:14<44:36, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127476/488281 [27:14<44:26, 135.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 127504/488281 [27:15<51:04, 117.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.1153564453125, 'l2_loss': 67.31285095214844, 'l1_loss': 56.80250930786133}
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 26%|██▌       | 127532/488281 [27:15<47:38, 126.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127560/488281 [27:15<45:56, 130.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127588/488281 [27:15<45:05, 133.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127602/488281 [27:15<44:50, 134.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.32012176513672, 'l2_loss': 67.18647766113281, 'l1_loss': 56.133644104003906}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.814903974533081 4.2598466873168945 8.924509048461914
91.29%
Reconstruction: (0.9129203321301519, 3.814903974533081, 4.2598466873168945, 8.924509048461914)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.38it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.42it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 26%|██▌       | 127630/488281 [27:20<7:32:21, 13.29it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127658/488281 [27:20<4:04:26, 24.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 127672/488281 [27:20<3:04:25, 32.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 127699/488281 [27:21<2:03:51, 48.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127727/488281 [27:21<1:23:01, 72.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.19917297363281, 'l2_loss': 66.7562026977539, 'l1_loss': 56.44296646118164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 26%|██▌       | 127755/488281 [27:21<1:03:13, 95.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127783/488281 [27:21<53:36, 112.08it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127811/488281 [27:21<48:53, 122.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.47307586669922, 'l2_loss': 67.60694885253906, 'l1_loss': 56.866127014160156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 26%|██▌       | 127839/488281 [27:22<46:34, 128.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127867/488281 [27:22<45:28, 132.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 127895/488281 [27:22<52:14, 114.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 127923/488281 [27:22<48:15, 124.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.78254699707031, 'l2_loss': 67.31826782226562, 'l1_loss': 56.46427917480469}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▌       | 127951/488281 [27:23<46:14, 129.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 127979/488281 [27:23<45:15, 132.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 128007/488281 [27:23<44:46, 134.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 128035/488281 [27:23<44:34, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.64433288574219, 'l2_loss': 67.69703674316406, 'l1_loss': 56.947296142578125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 26%|██▌       | 128063/488281 [27:23<44:26, 135.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 128077/488281 [27:24<55:10, 108.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▌       | 128105/488281 [27:24<49:39, 120.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.84126281738281, 'l2_loss': 67.30277252197266, 'l1_loss': 56.53849411010742}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▌       | 128133/488281 [27:24<46:58, 127.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▌       | 128161/488281 [27:24<45:36, 131.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 128189/488281 [27:24<45:10, 132.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128217/488281 [27:25<44:44, 134.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.81934356689453, 'l2_loss': 67.34719848632812, 'l1_loss': 56.472145080566406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 26%|██▋       | 128245/488281 [27:25<44:32, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 128273/488281 [27:25<51:46, 115.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 128301/488281 [27:25<47:58, 125.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128329/488281 [27:26<46:06, 130.11it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.61825561523438, 'l2_loss': 67.73381042480469, 'l1_loss': 56.88444519042969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▋       | 128357/488281 [27:26<45:11, 132.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128385/488281 [27:26<44:43, 134.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128413/488281 [27:26<44:32, 134.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.93521881103516, 'l2_loss': 67.24028015136719, 'l1_loss': 56.69493865966797}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▋       | 128441/488281 [27:26<44:24, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 128469/488281 [27:27<52:24, 114.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 128497/488281 [27:27<48:16, 124.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128525/488281 [27:27<46:15, 129.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.29495239257812, 'l2_loss': 67.75005340576172, 'l1_loss': 56.54489517211914}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 26%|██▋       | 128553/488281 [27:27<45:14, 132.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128581/488281 [27:27<44:44, 133.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128609/488281 [27:28<44:31, 134.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 80
acts.shape=torch.Size([4096, 4096])
{'loss': 124.46768951416016, 'l2_loss': 67.58582305908203, 'l1_loss': 56.8818664


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 15.95it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 11.77it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 11.74it/s]
 26%|██▋       | 128623/488281 [27:32<10:30:24,  9.51it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])

 26%|██▋       | 128637/488281 [27:32<7:34:32, 13.19it/s] 


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 128662/488281 [27:33<4:21:55, 22.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 128690/488281 [27:33<2:28:36, 40.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128718/488281 [27:33<1:34:47, 63.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.734130859375, 'l2_loss': 67.42872619628906, 'l1_loss': 56.3054084777832}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 26%|██▋       | 128746/488281 [27:33<1:08:53, 86.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128774/488281 [27:34<56:17, 106.44it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128802/488281 [27:34<50:08, 119.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128830/488281 [27:34<47:09, 127.05it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.40522003173828, 'l2_loss': 66.96006774902344, 'l1_loss': 56.445152282714844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 128844/488281 [27:34<56:05, 106.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 128872/488281 [27:34<50:04, 119.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128900/488281 [27:35<47:05, 127.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.83966064453125, 'l2_loss': 67.0145263671875, 'l1_loss': 56.82513427734375}
acts.shape=t

 26%|██▋       | 128928/488281 [27:35<45:38, 131.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128956/488281 [27:35<44:54, 133.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 128984/488281 [27:35<44:34, 134.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 129012/488281 [27:35<44:25, 134.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05368041992188, 'l2_loss': 67.47052001953125, 'l1_loss': 56.58315658569336}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 129040/488281 [27:36<51:05, 117.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 129068/488281 [27:36<47:35, 125.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 129096/488281 [27:36<45:51, 130.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 129124/488281 [27:36<45:01, 132.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.09468078613281, 'l2_loss': 67.87873840332031, 'l1_loss': 56.2159423828125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 26%|██▋       | 129152/488281 [27:36<44:35, 134.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 129180/488281 [27:37<44:25, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 129208/488281 [27:37<44:32, 134.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.82846069335938, 'l2_loss': 68.02765655517578, 'l1_loss': 56.800804138183594}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 26%|██▋       | 129236/488281 [27:37<56:02, 106.79it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 26%|██▋       | 129264/488281 [27:37<50:00, 119.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 129292/488281 [27:38<47:02, 127.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 129320/488281 [27:38<45:35, 131.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.63152313232422, 'l2_loss': 67.31758117675781, 'l1_loss': 56.313941955566406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 26%|██▋       | 129348/488281 [27:38<44:52, 133.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 26%|██▋       | 129376/488281 [27:38<44:31, 134.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129404/488281 [27:38<44:21, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 129432/488281 [27:39<51:46, 115.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.12316131591797, 'l2_loss': 66.80001831054688, 'l1_loss': 56.323143005371094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 27%|██▋       | 129460/488281 [27:39<47:54, 124.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129488/488281 [27:39<45:59, 130.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129516/488281 [27:39<45:05, 132.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.29000854492188, 'l2_loss': 67.20274353027344, 'l1_loss': 56.08726501464844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 129544/488281 [27:40<44:36, 134.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129572/488281 [27:40<44:21, 134.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129586/488281 [27:40<44:18, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 129600/488281 [27:40<53:59, 110.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.33224487304688, 'l2_loss': 67.51299285888672, 'l1_loss': 56.81925582885742}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.913637161254883 4.3406805992126465 8.960247993469238
91.54%
Reconstruction: (0.9153801527092619, 3.913637161254883, 4.3406805992126465, 8.960247993469238)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 12.09it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 10.60it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 10.49it/s]
 27%|██▋       | 129612/488281 [27:45<11:05:47,  8.98it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 129640/488281 [27:45<5:41:57, 17.48it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129668/488281 [27:45<3:08:29, 31.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129696/488281 [27:45<1:54:40, 52.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 129724/488281 [27:46<1:18:38, 75.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.93089294433594, 'l2_loss': 67.55215454101562, 'l1_loss': 56.37873840332031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 129752/488281 [27:46<1:01:00, 97.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129780/488281 [27:46<52:25, 113.99it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 129808/488281 [27:46<54:58, 108.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05581665039062, 'l2_loss': 67.19908142089844, 'l1_loss': 56.85673522949219}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 129836/488281 [27:46<49:25, 120.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129864/488281 [27:47<46:43, 127.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129892/488281 [27:47<45:24, 131.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129920/488281 [27:47<44:44, 133.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.1678466796875, 'l2_loss': 67.16555786132812, 'l1_loss': 57.002288818359375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 129948/488281 [27:47<44:24, 134.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 129976/488281 [27:48<44:16, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130004/488281 [27:48<50:56, 117.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130032/488281 [27:48<47:27, 125.81it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.09904479980469, 'l2_loss': 67.57855224609375, 'l1_loss': 56.52048873901367}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 130060/488281 [27:48<45:46, 130.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130088/488281 [27:48<44:53, 132.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130116/488281 [27:49<44:30, 134.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.92083740234375, 'l2_loss': 67.431884765625, 'l1_loss': 56.488948822021484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 27%|██▋       | 130144/488281 [27:49<44:18, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130172/488281 [27:49<44:12, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130200/488281 [27:49<50:50, 117.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130228/488281 [27:50<47:35, 125.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.9285888671875, 'l2_loss': 67.44930267333984, 'l1_loss': 56.47929000854492}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 27%|██▋       | 130256/488281 [27:50<45:48, 130.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130284/488281 [27:50<44:55, 132.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130312/488281 [27:50<44:29, 134.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.79080200195312, 'l2_loss': 67.24526977539062, 'l1_l

 27%|██▋       | 130340/488281 [27:50<44:15, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130354/488281 [27:50<44:13, 134.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130382/488281 [27:51<50:59, 116.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130410/488281 [27:51<47:27, 125.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.29169464111328, 'l2_loss': 67.00646209716797, 'l1_loss': 56.28523254394531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 130438/488281 [27:51<45:43, 130.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130466/488281 [27:51<44:52, 132.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130494/488281 [27:52<44:27, 134.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130522/488281 [27:52<44:17, 134.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.06185150146484, 'l2_loss': 67.38603210449219, 'l1_loss': 56.675819396972656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 27%|██▋       | 130550/488281 [27:52<44:09, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130578/488281 [27:52<50:45, 117.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130606/488281 [27:52<47:18, 126.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 81
acts.shape=torch.Size([4096, 4096])
{'loss': 122.80597686767578, 'l2_loss': 66.65821838378906, 'l1_loss': 56.1477584


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.22it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 27%|██▋       | 130633/488281 [27:57<7:36:12, 13.07it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130661/488281 [27:57<4:04:25, 24.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130689/488281 [27:58<2:21:50, 42.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130717/488281 [27:58<1:32:08, 64.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97046661376953, 'l2_loss': 67.21673583984375, 'l1_loss': 56.75373077392578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 130745/488281 [27:58<1:07:33, 88.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130773/488281 [27:58<1:02:12, 95.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130801/488281 [27:59<52:53, 112.64it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130829/488281 [27:59<48:22, 123.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.28001403808594, 'l2_loss': 67.0992202758789, 'l1_loss': 56.180789947509766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 130857/488281 [27:59<46:09, 129.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130885/488281 [27:59<45:02, 132.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 130913/488281 [27:59<44:32, 133.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.916015625, 'l2_loss': 67.34991455078125, 'l1_loss': 56.56610107421875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch

 27%|██▋       | 130941/488281 [28:00<44:15, 134.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130969/488281 [28:00<50:47, 117.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 130997/488281 [28:00<47:18, 125.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131025/488281 [28:00<45:36, 130.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.37693786621094, 'l2_loss': 66.84288024902344, 'l1_loss': 56.534061431884766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 27%|██▋       | 131053/488281 [28:00<44:45, 133.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131081/488281 [28:01<44:20, 134.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131109/488281 [28:01<44:10, 134.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131123/488281 [28:01<44:07, 134.89it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.13025665283203, 'l2_loss': 67.13352966308594, 'l1_loss': 55.996726989746094}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131151/488281 [28:01<52:57, 112.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131179/488281 [28:01<48:21, 123.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131207/488281 [28:02<46:07, 129.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.83183288574219, 'l2_loss': 67.03594970703125, 'l1_loss': 56.7958869934082}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 27%|██▋       | 131235/488281 [28:02<45:11, 131.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131263/488281 [28:02<44:34, 133.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131291/488281 [28:02<44:15, 134.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131319/488281 [28:03<44:06, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.77557373046875, 'l2_loss': 66.93910217285156, 'l1_loss': 56.83647537231445}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 131347/488281 [28:03<50:48, 117.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131375/488281 [28:03<47:18, 125.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131403/488281 [28:03<45:33, 130.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131431/488281 [28:03<44:44, 132.94it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.84195709228516, 'l2_loss': 67.33969116210938, 'l1_loss': 56.50226593017578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 131459/488281 [28:04<44:18, 134.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131487/488281 [28:04<44:04, 134.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131515/488281 [28:04<44:01, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.97191619873047, 'l2_loss': 67.9325180053711, 'l1_loss': 57.039398193359375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131543/488281 [28:04<50:39, 117.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131571/488281 [28:05<47:12, 125.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131599/488281 [28:05<45:31, 130.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.32it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.87it/s][A
 27%|██▋       | 131613/488281 [28:09<10:18:41,  9.61it/s]

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131627/488281 [28:09<7:26:14, 13.32it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131655/488281 [28:10<4:01:01, 24.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131683/488281 [28:10<2:20:28, 42.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131711/488281 [28:10<1:31:13, 65.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.38447570800781, 'l2_loss': 67.5478286743164, 'l1_loss': 56.83665084838867}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131739/488281 [28:10<1:13:43, 80.60it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131767/488281 [28:11<58:28, 101.61it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131795/488281 [28:11<51:01, 116.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131823/488281 [28:11<47:24, 125.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.36802673339844, 'l2_loss': 66.88299560546875, 'l1_loss': 56.48503112792969}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 131851/488281 [28:11<45:35, 130.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131879/488281 [28:11<44:42, 132.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131893/488281 [28:11<44:27, 133.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 131921/488281 [28:12<51:00, 116.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.57299041748047, 'l2_loss': 67.01248168945312, 'l1_loss': 56.560508728027344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 27%|██▋       | 131949/488281 [28:12<47:20, 125.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 131977/488281 [28:12<45:32, 130.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132005/488281 [28:12<44:41, 132.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.40513610839844, 'l2_loss': 66.97633361816406, 'l1_loss': 56.42880630493164}
acts.shape=

 27%|██▋       | 132033/488281 [28:13<44:17, 134.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132061/488281 [28:13<44:03, 134.78it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132089/488281 [28:13<43:56, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 132117/488281 [28:13<50:37, 117.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.8670654296875, 'l2_loss': 67.21044158935547, 'l1_loss': 56.656620025634766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 132145/488281 [28:14<47:09, 125.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132173/488281 [28:14<45:27, 130.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132201/488281 [28:14<44:37, 132.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132229/488281 [28:14<44:14, 134.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.88246154785156, 'l2_loss': 67.73399353027344, 'l1_loss': 57.14846420288086}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 132257/488281 [28:14<44:18, 133.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 132285/488281 [28:15<44:03, 134.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 132313/488281 [28:15<50:42, 116.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.61619567871094, 'l2_loss': 66.91333770751953, 'l1_loss': 56.70286178588867}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 132341/488281 [28:15<47:10, 125.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132369/488281 [28:15<45:26, 130.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132397/488281 [28:15<44:36, 132.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132425/488281 [28:16<44:11, 134.20it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.27944946289062, 'l2_loss': 67.5747299194336, 'l1_loss': 56.70471954345703}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 27%|██▋       | 132453/488281 [28:16<43:59, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132467/488281 [28:16<43:55, 134.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 132495/488281 [28:16<50:33, 117.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 132523/488281 [28:16<47:07, 125.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.58628845214844, 'l2_loss': 67.11335754394531, 'l1_loss': 56.472930908203125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 27%|██▋       | 132551/488281 [28:17<45:25, 130.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132579/488281 [28:17<44:33, 133.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132607/488281 [28:17<44:09, 134.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 82
acts.shape=torch.Size([4096, 4096])
{'loss': 123.04107666015625, 'l2_loss': 66.89480590820312, 'l1_loss': 56.14626693725586}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torc


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.17it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.84it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 27%|██▋       | 132635/488281 [28:22<7:25:46, 13.30it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132663/488281 [28:22<4:00:42, 24.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132676/488281 [28:22<3:18:44, 29.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 132704/488281 [28:22<1:58:51, 49.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132732/488281 [28:23<1:20:21, 73.75it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.46559143066406, 'l2_loss': 67.28521728515625, 'l1_loss': 56.18037414550781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 132760/488281 [28:23<1:01:54, 95.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 132788/488281 [28:23<52:39, 112.53it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132816/488281 [28:23<48:07, 123.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.2130355834961, 'l2_loss': 66.97990417480469, 'l1_loss': 56.233131408691406}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 132844/488281 [28:23<45:52, 129.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132858/488281 [28:24<45:13, 130.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 132886/488281 [28:24<51:48, 114.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132914/488281 [28:24<47:43, 124.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.55022430419922, 'l2_loss': 66.95431518554688, 'l1_loss': 56.595909118652344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 27%|██▋       | 132942/488281 [28:24<45:40, 129.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132970/488281 [28:24<44:40, 132.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 132998/488281 [28:25<44:10, 134.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133026/488281 [28:25<43:59, 134.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.98602294921875, 'l2_loss': 66.5997543334961, 'l1_loss': 56.38626480102539}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 27%|██▋       | 133054/488281 [28:25<43:51, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133068/488281 [28:25<54:06, 109.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133096/488281 [28:25<48:48, 121.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133124/488281 [28:26<46:13, 128.07it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.32088470458984, 'l2_loss': 67.00051879882812, 'l1_loss': 56.32036590576172}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 133152/488281 [28:26<44:55, 131.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133180/488281 [28:26<44:17, 133.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133208/488281 [28:26<43:58, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.34461212158203, 'l2_loss': 67.65499877929688, 'l1_loss': 56.689613342285156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 27%|██▋       | 133236/488281 [28:27<43:50, 134.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133264/488281 [28:27<50:52, 116.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133292/488281 [28:27<47:22, 124.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133320/488281 [28:27<45:29, 130.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.80912780761719, 'l2_loss': 67.58277130126953, 'l1_loss': 56.22635269165039}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 133348/488281 [28:27<44:34, 132.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133376/488281 [28:28<44:06, 134.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133404/488281 [28:28<43:54, 134.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.48432922363281, 'l2_loss': 66.97169494628906, 'l1_loss': 56.512630462646484}
acts.shape

 27%|██▋       | 133432/488281 [28:28<43:47, 135.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133460/488281 [28:28<50:59, 115.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133488/488281 [28:29<47:14, 125.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133516/488281 [28:29<45:26, 130.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.8742446899414, 'l2_loss': 67.28855895996094, 'l1_loss': 56.58568572998047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 27%|██▋       | 133544/488281 [28:29<44:31, 132.79it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133572/488281 [28:29<44:04, 134.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133600/488281 [28:29<43:50, 134.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 10.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 10.34it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 10.31it/s]
 27%|██▋       | 133614/488281 [28:34<10:27:13,  9.42it/s]

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133639/488281 [28:34<5:59:45, 16.43it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133667/488281 [28:35<3:13:17, 30.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133695/488281 [28:35<1:55:45, 51.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133723/488281 [28:35<1:18:43, 75.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.1665267944336, 'l2_loss': 67.09593200683594, 'l1_loss': 56.070594787597656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 133751/488281 [28:35<1:00:44, 97.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133779/488281 [28:35<52:12, 113.16it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133807/488281 [28:36<47:51, 123.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133821/488281 [28:36<46:37, 126.73it/s]

{'loss': 124.31727600097656, 'l2_loss': 67.6283950805664, 'l1_loss': 56.688880920410156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 133849/488281 [28:36<52:00, 113.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133877/488281 [28:36<47:43, 123.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133905/488281 [28:36<45:36, 129.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.42259979248047,

 27%|██▋       | 133933/488281 [28:37<44:36, 132.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133961/488281 [28:37<44:05, 133.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 133989/488281 [28:37<43:50, 134.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 134003/488281 [28:37<43:47, 134.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.80184936523438, 'l2_loss': 67.16748046875, 'l1_loss': 56.63437271118164}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 134031/488281 [28:37<50:18, 117.36it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 134059/488281 [28:38<46:51, 125.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 134087/488281 [28:38<45:10, 130.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 134115/488281 [28:38<44:22, 133.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.48922729492188, 'l2_loss': 67.12612915039062, 'l1_loss': 56.363094329833984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 27%|██▋       | 134143/488281 [28:38<43:56, 134.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 134171/488281 [28:38<43:45, 134.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 27%|██▋       | 134199/488281 [28:39<43:39, 135.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 27%|██▋       | 134227/488281 [28:39<50:12, 117.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.01953887939453, 'l2_loss': 67.48255920410156, 'l1_loss': 56.53697967529297}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 27%|██▋       | 134255/488281 [28:39<46:49, 126.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134283/488281 [28:39<45:08, 130.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134311/488281 [28:40<44:31, 132.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.24235534667969, 'l2_loss': 67.51742553710938, 'l1_loss': 56.72492980957031}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 134339/488281 [28:40<44:01, 134.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134367/488281 [28:40<43:46, 134.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134395/488281 [28:40<43:40, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 134423/488281 [28:40<50:23, 117.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.06565856933594, 'l2_loss': 67.34452819824219, 'l1_loss': 56.721134185791016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 28%|██▊       | 134451/488281 [28:41<46:53, 125.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134479/488281 [28:41<45:10, 130.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134507/488281 [28:41<44:19, 133.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134535/488281 [28:41<43:55, 134.24it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.87710571289062, 'l2_loss': 67.45765686035156, 'l1_loss': 56.4194450378418}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 28%|██▊       | 134563/488281 [28:41<43:42, 134.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134591/488281 [28:42<43:37, 135.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 134605/488281 [28:42<53:02, 111.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 83
acts.shape=torch.Size([4096, 4096])
{'loss': 124.56185150146484, 'l2_loss': 67.57954406738281, 'l1_loss': 56.98230743408203}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.38it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 28%|██▊       | 134631/488281 [28:47<7:40:49, 12.79it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134659/488281 [28:47<4:04:51, 24.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134687/488281 [28:47<2:21:25, 41.67it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134715/488281 [28:47<1:31:21, 64.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.98890686035156, 'l2_loss': 67.39985656738281, 'l1_loss': 56.58905029296875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 134743/488281 [28:47<1:06:56, 88.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134771/488281 [28:48<55:00, 107.10it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134785/488281 [28:48<1:00:48, 96.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 134813/488281 [28:48<52:15, 112.72it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.56031799316406, 'l2_loss': 67.53325653076172, 'l1_loss': 57.02706527709961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 134841/488281 [28:48<47:48, 123.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134869/488281 [28:48<45:37, 129.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134897/488281 [28:49<44:33, 132.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134925/488281 [28:49<44:01, 133.76it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.70088195800781, 'l2_loss': 67.95457458496094, 'l1_loss': 56.746307373046875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 28%|██▊       | 134953/488281 [28:49<43:45, 134.56it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 134967/488281 [28:49<43:41, 134.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 134981/488281 [28:49<53:29, 110.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 134995/488281 [28:49<50:29, 116.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135009/488281 [28:50<48:22, 121.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135023/488281 [28:50<46:57, 125.39it/s]

{'loss': 124.64640808105469, 'l2_loss': 68.14806365966797, 'l1_loss': 56.498348236083984}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 28%|██▊       | 135037/488281 [28:50<45:54, 128.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135051/488281 [28:50<45:10, 130.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135065/488281 [28:50<44:40, 131.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135079/488281 [28:50<44:18, 132.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135093/488281 [28:50<44:03, 133.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135107/488281 [28:50<43:52, 134.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.23724365234375, 'l2_loss': 67.55585479736328, 'l1_loss': 56.681392669677734}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 28%|██▊       | 135121/488281 [28:50<43:46, 134.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135135/488281 [28:50<43:41, 134.69it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135149/488281 [28:51<43:37, 134.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135163/488281 [28:51<43:34, 135.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135191/488281 [28:51<50:01, 117.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135219/488281 [28:51<46:42, 125.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.30692291259766, 'l2_loss': 67.73065948486328, 'l1_loss': 56.576263427734375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 28%|██▊       | 135247/488281 [28:51<45:02, 130.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135275/488281 [28:52<44:13, 133.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135303/488281 [28:52<43:50, 134.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135331/488281 [28:52<43:43, 134.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.38513946533203, 'l2_loss': 66.72564697265625, 'l1_loss': 56.65949249267578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 135359/488281 [28:52<43:33, 135.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135373/488281 [28:52<53:00, 110.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135401/488281 [28:53<48:02, 122.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.36935424804688, 'l2_loss': 67.5014877319336, 'l1_loss': 56.86787033081055}
acts.shape=t

 28%|██▊       | 135429/488281 [28:53<45:39, 128.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135457/488281 [28:53<44:27, 132.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135485/488281 [28:53<43:51, 134.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135513/488281 [28:53<43:34, 134.92it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 125.01428985595703, 'l2_loss': 67.87035369873047, 'l1_loss': 57.14393615722656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 135541/488281 [28:54<43:25, 135.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135569/488281 [28:54<52:44, 111.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135597/488281 [28:54<47:54, 122.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 16.55it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



 80%|████████  | 4/5 [00:00<00:00, 11.96it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])


100%|██████████| 5/5 [00:00<00:00, 11.87it/s]
 28%|██▊       | 135625/488281 [28:59<7:25:25, 13.20it/s] 

Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135653/488281 [28:59<4:00:20, 24.45it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135681/488281 [28:59<2:19:48, 42.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135709/488281 [28:59<1:30:35, 64.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135737/488281 [29:00<1:06:29, 88.37it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 124.1921615600586, 'l2_loss': 67.51261901855469, 'l1_loss': 56.679542541503906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 135765/488281 [29:00<1:07:05, 87.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135793/488281 [29:00<54:55, 106.95it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135821/488281 [29:00<49:18, 119.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.02078247070312, 'l2_loss': 67.55401611328125, 'l1_loss': 56.46676254272461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 135849/488281 [29:01<46:12, 127.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135877/488281 [29:01<44:41, 131.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 135905/488281 [29:01<43:58, 133.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.84062194824219, 'l2_loss': 67.2495346069336, 'l1_loss': 56.59108352661133}
acts.shape=t

 28%|██▊       | 135933/488281 [29:01<43:36, 134.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135961/488281 [29:02<51:36, 113.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 135989/488281 [29:02<47:21, 123.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136017/488281 [29:02<45:27, 129.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.9632568359375, 'l2_loss': 67.20083618164062, 'l1_loss': 56.762420654296875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 136031/488281 [29:02<45:02, 130.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136059/488281 [29:02<44:11, 132.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136087/488281 [29:02<43:40, 134.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136115/488281 [29:03<43:28, 135.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.22853088378906, 'l2_loss': 67.21961212158203, 'l1_loss': 56.00891876220703}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 136143/488281 [29:03<50:08, 117.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 136171/488281 [29:03<46:35, 125.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136199/488281 [29:03<44:51, 130.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136227/488281 [29:04<44:00, 133.31it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.72824096679688, 'l2_loss': 67.33242797851562, 'l1_loss': 56.39581298828125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 136255/488281 [29:04<43:35, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136283/488281 [29:04<43:23, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136311/488281 [29:04<43:19, 135.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.97036743164062, 'l2_loss': 67.57899475097656, 'l1_loss': 56.39137649536133}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 136339/488281 [29:05<50:29, 116.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 136367/488281 [29:05<46:45, 125.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136395/488281 [29:05<44:55, 130.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136423/488281 [29:05<44:04, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.9549560546875, 'l2_loss': 67.5704345703125, 'l1_loss': 56.384521484375}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torc

 28%|██▊       | 136451/488281 [29:05<43:37, 134.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136479/488281 [29:06<43:24, 135.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136507/488281 [29:06<43:17, 135.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.08145141601562, 'l2_loss': 67.46332550048828, 'l1_loss': 56.61812973022461}


 28%|██▊       | 136535/488281 [29:06<49:51, 117.59it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 136563/488281 [29:06<46:25, 126.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136591/488281 [29:06<44:45, 130.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136605/488281 [29:07<44:17, 132.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 84
acts.shape=torch.Size([4096, 4096])
{'loss': 124.09696960449219, 'l2_loss': 67.66853332519531, 'l1_loss': 56.42844009399414}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.9058804512023926 4.333981990814209 8.951322555541992
91.52%
Reconstruction: (


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.31it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 28%|██▊       | 136633/488281 [29:11<7:21:07, 13.29it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136661/488281 [29:11<3:58:08, 24.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136689/488281 [29:12<2:18:42, 42.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136703/488281 [29:12<1:50:02, 53.25it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.1620864868164, 'l2_loss': 67.30229187011719, 'l1_loss': 56.85979461669922}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 136731/488281 [29:12<1:22:16, 71.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136759/488281 [29:12<1:02:19, 94.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136787/488281 [29:12<52:32, 111.50it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136815/488281 [29:13<47:46, 122.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.55799865722656, 'l2_loss': 67.16090393066406, 'l1_loss': 56.3970947265625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 28%|██▊       | 136843/488281 [29:13<45:42, 128.15it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136871/488281 [29:13<44:22, 131.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136899/488281 [29:13<53:21, 109.75it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 136927/488281 [29:14<48:09, 121.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.41342163085938, 'l2_loss': 67.1773452758789, 'l1_loss': 56.23607635498047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 28%|██▊       | 136955/488281 [29:14<45:36, 128.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 136983/488281 [29:14<44:19, 132.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137011/488281 [29:14<43:43, 133.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.7105712890625, 'l2_loss': 67.51708221435547, 'l1_lo

 28%|██▊       | 137039/488281 [29:14<43:22, 134.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137067/488281 [29:15<43:14, 135.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137081/488281 [29:15<43:12, 135.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137109/488281 [29:15<51:48, 112.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.04122924804688, 'l2_loss': 67.29512023925781, 'l1_loss': 56.74611282348633}
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 137137/488281 [29:15<47:23, 123.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137165/488281 [29:15<45:12, 129.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137193/488281 [29:16<44:07, 132.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137221/488281 [29:16<43:35, 134.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.52577209472656, 'l2_loss': 67.00865936279297, 'l1_loss': 56.51710891723633}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 137249/488281 [29:16<43:20, 134.97it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137277/488281 [29:16<43:12, 135.37it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137305/488281 [29:17<49:58, 117.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137333/488281 [29:17<46:27, 125.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.00982666015625, 'l2_loss': 67.09194946289062, 'l1_loss': 55.91787338256836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 137347/488281 [29:17<45:26, 128.72it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137375/488281 [29:17<44:28, 131.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137403/488281 [29:17<43:45, 133.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.6834487915039, 'l2_loss': 67.1974105834961, 'l1_loss': 56.48603820800781}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 28%|██▊       | 137431/488281 [29:17<43:25, 134.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137459/488281 [29:18<43:14, 135.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137487/488281 [29:18<50:11, 116.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137515/488281 [29:18<46:34, 125.53it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.69549560546875, 'l2_loss': 67.18717956542969, 'l1_loss': 56.50831985473633}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 137543/488281 [29:18<44:45, 130.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137571/488281 [29:19<43:50, 133.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137599/488281 [29:19<43:25, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.22it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.85it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 28%|██▊       | 137627/488281 [29:24<7:18:25, 13.33it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137655/488281 [29:24<3:56:47, 24.68it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137682/488281 [29:24<2:25:53, 40.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137710/488281 [29:24<1:33:00, 62.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.11740112304688, 'l2_loss': 67.381103515625, 'l1_loss': 56.73629379272461}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 28%|██▊       | 137738/488281 [29:24<1:07:26, 86.63it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137766/488281 [29:25<54:58, 106.28it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137794/488281 [29:25<48:51, 119.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137822/488281 [29:25<45:54, 127.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.56204223632812, 'l2_loss': 67.71968078613281, 'l1_loss': 56.84236526489258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 137850/488281 [29:25<44:27, 131.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137878/488281 [29:26<50:28, 115.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 137906/488281 [29:26<46:40, 125.13it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137934/488281 [29:26<44:47, 130.34it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.58424377441406, 'l2_loss': 67.01801300048828, 'l1_loss': 56.56623077392578}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 137962/488281 [29:26<43:52, 133.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 137990/488281 [29:26<43:24, 134.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138018/488281 [29:27<43:14, 135.02it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.65704345703125, 'l2_loss': 67.52851867675781, 'l1_loss': 57.12852478027344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 138046/488281 [29:27<43:05, 135.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138060/488281 [29:27<56:20, 103.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138088/488281 [29:27<49:30, 117.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138116/488281 [29:27<46:12, 126.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.02030181884766, 'l2_loss': 67.24702453613281, 'l1_loss': 56.773277282714844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 28%|██▊       | 138144/488281 [29:28<44:32, 130.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138172/488281 [29:28<43:44, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138200/488281 [29:28<43:20, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.68807983398438, 'l2_loss': 67.38934326171875, 'l1_loss': 56.29873275756836}
acts.shape=

 28%|██▊       | 138228/488281 [29:28<43:10, 135.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138256/488281 [29:28<50:48, 114.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138284/488281 [29:29<46:47, 124.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138312/488281 [29:29<44:51, 130.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.13595581054688, 'l2_loss': 67.58565521240234, 'l1_loss': 56.5503044128418}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 28%|██▊       | 138340/488281 [29:29<43:54, 132.85it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138368/488281 [29:29<43:23, 134.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138396/488281 [29:30<43:17, 134.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138424/488281 [29:30<43:09, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.063232421875, 'l2_loss': 67.22708129882812, 'l1_loss': 56.83614730834961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138452/488281 [29:30<50:04, 116.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138480/488281 [29:30<46:27, 125.50it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138508/488281 [29:30<44:38, 130.58it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.17874145507812, 'l2_loss': 66.82589721679688, 'l1_loss': 56.352848052978516}
acts.shape

 28%|██▊       | 138536/488281 [29:31<43:47, 133.11it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138564/488281 [29:31<43:20, 134.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138592/488281 [29:31<43:07, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138606/488281 [29:31<43:04, 135.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 85
acts.shape=torch.Size([4096, 4096])
{'loss': 123.7066650390625, 'l2_loss': 67.22273254394531, 'l1_loss': 56.48392868041992}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.79060435295105 4.232203960418701 8.858241081237793
91.29%
Reconstruction: (0.9128588667370902, 3.79060435295105, 4.232203960418701, 8.858241081237793)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 17.41it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.50it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 28%|██▊       | 138620/488281 [29:36<10:14:14,  9.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138630/488281 [29:36<8:10:09, 11.89it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138644/488281 [29:36<5:47:23, 16.77it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138658/488281 [29:36<4:12:01, 23.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138672/488281 [29:36<3:07:23, 31.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138686/488281 [29:36<2:23:07, 40.71it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138700/488281 [29:36<1:52:37, 51.73it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.77119445800781, 'l2_loss': 66.56356048583984, 'l1_loss': 56.207637786865234}
acts.shape=torch.Size([4096, 4096])
acts.shape

 28%|██▊       | 138714/488281 [29:37<1:31:30, 63.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138728/488281 [29:37<1:16:50, 75.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138742/488281 [29:37<1:06:35, 87.48it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138756/488281 [29:37<59:27, 97.99it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138770/488281 [29:37<54:27, 106.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138784/488281 [29:37<50:58, 114.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138798/488281 [29:37<48:33, 119.96it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138812/488281 [29:37<46:52, 124.27it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.94158172607422, 'l2_loss': 67.57247924804688, 'l1_loss': 56.369102478027344}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138840/488281 [29:38<51:14, 113.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138868/488281 [29:38<46:57, 124.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138896/488281 [29:38<45:02, 129.26it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 138924/488281 [29:38<43:59, 132.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.6917953491211, 'l2_loss': 67.0251693725586, 'l1_loss': 56.6666259765625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 28%|██▊       | 138952/488281 [29:38<43:25, 134.07it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138980/488281 [29:39<43:09, 134.90it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 138994/488281 [29:39<43:05, 135.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 28%|██▊       | 139022/488281 [29:39<49:35, 117.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.0393295288086, 'l2_loss': 67.46348571777344, 'l1_loss': 56.575843811035156}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 139050/488281 [29:39<46:09, 126.09it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 139078/488281 [29:39<44:28, 130.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 28%|██▊       | 139106/488281 [29:40<43:38, 133.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.70440673828125, 'l2_loss': 68.25479125976562, 'l1_loss': 56.44961166381836}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 28%|██▊       | 139134/488281 [29:40<43:14, 134.55it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139162/488281 [29:40<43:01, 135.23it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139190/488281 [29:40<42:56, 135.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 139218/488281 [29:41<49:23, 117.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.95677185058594, 'l2_loss': 67.4813232421875, 'l1_loss': 56.4754524230957}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=to

 29%|██▊       | 139246/488281 [29:41<46:01, 126.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139274/488281 [29:41<44:22, 131.06it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139302/488281 [29:41<43:35, 133.42it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139330/488281 [29:41<43:13, 134.57it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.13194274902344, 'l2_loss': 66.82754516601562, 'l1_loss': 56.30440139770508}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 29%|██▊       | 139358/488281 [29:42<43:00, 135.22it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139386/488281 [29:42<42:55, 135.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 139414/488281 [29:42<49:29, 117.49it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.29552459716797, 'l2_loss': 67.26905822753906, 'l1_loss': 56.026466369628906}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 29%|██▊       | 139442/488281 [29:42<46:05, 126.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139470/488281 [29:42<44:25, 130.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139498/488281 [29:43<43:34, 133.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139526/488281 [29:43<43:11, 134.57it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.23898315429688, 'l2_loss': 67.42312622070312, 'l1_loss': 56.815860748291016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 29%|██▊       | 139554/488281 [29:43<43:00, 135.14it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139582/488281 [29:43<42:55, 135.38it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 139596/488281 [29:43<52:16, 111.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 139610/488281 [29:44<49:25, 117.59it/s]

acts.shape=torch.Size([4096, 4096])
{'loss': 123.57494354248047, 'l2_loss': 67.10478210449219, 'l1_loss': 56.47016143798828}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.82081937789917 4.249644756317139 8.873575210571289
91.51%
Reconstruction: (0.9151303976247775, 3.82081937789917, 4.249644756317139, 8.873575210571289)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.13it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.83it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 29%|██▊       | 139637/488281 [29:48<7:24:40, 13.07it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139665/488281 [29:48<3:58:11, 24.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139693/488281 [29:49<2:18:09, 42.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139721/488281 [29:49<1:29:26, 64.95it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.49452209472656, 'l2_loss': 66.87373352050781, 'l1_loss': 56.620784759521484}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 29%|██▊       | 139749/488281 [29:49<1:05:37, 88.51it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139763/488281 [29:49<58:46, 98.83it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 139791/488281 [29:49<56:59, 101.90it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 139819/488281 [29:50<50:06, 115.89it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.59923553466797, 'l2_loss': 67.30569458007812, 'l1_loss': 56.293540954589844}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 29%|██▊       | 139847/488281 [29:50<46:23, 125.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139875/488281 [29:50<44:32, 130.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139903/488281 [29:50<43:52, 132.35it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.15226745605469, 'l2_loss': 66.99413299560547, 'l1_l

 29%|██▊       | 139931/488281 [29:51<43:19, 134.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 139959/488281 [29:51<43:01, 134.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 139987/488281 [29:51<50:49, 114.21it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 140015/488281 [29:51<46:44, 124.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.59028625488281, 'l2_loss': 67.26322937011719, 'l1_loss': 56.32706069946289}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 29%|██▊       | 140043/488281 [29:51<44:41, 129.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 140071/488281 [29:52<43:41, 132.82it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 140099/488281 [29:52<43:10, 134.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 140127/488281 [29:52<42:57, 135.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.86537170410156, 'l2_loss': 68.17326354980469, 'l1_loss': 56.69210433959961}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 29%|██▊       | 140155/488281 [29:52<42:49, 135.47it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 140183/488281 [29:53<49:15, 117.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 140211/488281 [29:53<45:54, 126.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05270385742188, 'l2_loss': 67.4815673828125, 'l1_loss': 56.57113265991211}
acts.shape=t

 29%|██▊       | 140239/488281 [29:53<44:16, 131.00it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 140267/488281 [29:53<43:27, 133.44it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 140295/488281 [29:53<43:04, 134.66it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▊       | 140323/488281 [29:54<42:54, 135.17it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.08490753173828, 'l2_loss': 67.43373107910156, 'l1_loss': 56.65117645263672}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 29%|██▊       | 140351/488281 [29:54<42:47, 135.52it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▊       | 140365/488281 [29:54<52:18, 110.87it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 140393/488281 [29:54<47:22, 122.40it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140421/488281 [29:54<45:09, 128.39it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.32331848144531, 'l2_loss': 66.96942138671875, 'l1_loss': 56.35389709472656}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 29%|██▉       | 140449/488281 [29:55<43:53, 132.08it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140477/488281 [29:55<43:16, 133.94it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140505/488281 [29:55<42:59, 134.81it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.86087036132812, 'l2_loss': 67.33700561523438, 'l1_loss': 56.523868560791016}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 29%|██▉       | 140533/488281 [29:55<42:50, 135.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 140561/488281 [29:56<52:04, 111.28it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 140589/488281 [29:56<47:15, 122.61it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140603/488281 [29:56<45:52, 126.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 86
acts.shape=torch.Size([4096, 4096])
{'loss': 123.76698303222656, 'l2_loss': 67.09295654296875, 'l1_loss': 56.67402267456055}
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
acts.shape=torch.Size([64, 1024, 4096])
3.8709373474121094 4.303605079650879 8.957223892211914
91.49%
Reconstruction: (0.9149344559281414, 3.8709373474121094, 4.303605079650879, 8.957223892211914)



  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 19.31it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.86it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])



 29%|██▉       | 140631/488281 [30:01<7:21:42, 13.12it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140659/488281 [30:01<3:58:11, 24.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140687/488281 [30:01<2:18:28, 41.84it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140715/488281 [30:01<1:29:37, 64.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.37767028808594, 'l2_loss': 67.46768188476562, 'l1_loss': 56.90998458862305}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 29%|██▉       | 140743/488281 [30:01<1:14:41, 77.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 140771/488281 [30:02<58:20, 99.27it/s]  

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140799/488281 [30:02<50:20, 115.04it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140827/488281 [30:02<46:25, 124.76it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.22989654541016, 'l2_loss': 67.10905456542969, 'l1_loss': 56.12084197998047}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 29%|██▉       | 140855/488281 [30:02<44:28, 130.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140883/488281 [30:03<43:31, 133.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140911/488281 [30:03<43:06, 134.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.8038101196289, 

 29%|██▉       | 140925/488281 [30:03<43:12, 133.99it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 140953/488281 [30:03<49:23, 117.19it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 140981/488281 [30:03<45:56, 126.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141009/488281 [30:04<44:15, 130.80it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.88319396972656, 'l2_loss': 67.11509704589844, 'l1_l

 29%|██▉       | 141037/488281 [30:04<43:25, 133.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141065/488281 [30:04<42:59, 134.62it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141093/488281 [30:04<42:48, 135.18it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141107/488281 [30:04<42:45, 135.31it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.45600891113281, 'l2_loss': 67.28340148925781, 'l1_loss': 56.172611236572266}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141135/488281 [30:05<49:08, 117.74it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141163/488281 [30:05<45:47, 126.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141191/488281 [30:05<44:09, 131.03it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141219/488281 [30:05<43:23, 133.30it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.14595031738281, 'l2_loss': 67.73921203613281, 'l1_loss': 56.40673828125}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=tor

 29%|██▉       | 141247/488281 [30:05<42:59, 134.54it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141275/488281 [30:06<42:47, 135.16it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141303/488281 [30:06<42:41, 135.46it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.68617248535156, 'l2_loss': 67.06277465820312, 'l1_loss': 56.62339782714844}


 29%|██▉       | 141331/488281 [30:06<49:03, 117.86it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141359/488281 [30:06<45:44, 126.41it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141387/488281 [30:06<44:06, 131.05it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141415/488281 [30:07<43:19, 133.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 124.05661010742188, 'l2_loss': 67.50271606445312, 'l1_loss': 56.55389404296875}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 29%|██▉       | 141443/488281 [30:07<43:10, 133.91it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141471/488281 [30:07<42:52, 134.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141499/488281 [30:07<42:42, 135.33it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141527/488281 [30:08<55:05, 104.90it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.88352966308594, 'l2_loss': 67.49522399902344, 'l1_loss': 56.3883056640625}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=t

 29%|██▉       | 141555/488281 [30:08<48:40, 118.70it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141583/488281 [30:08<45:32, 126.88it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141597/488281 [30:08<44:38, 129.43it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.92022705078125, 'l2_loss': 67.45571899414062, 'l1_loss': 56.46451187133789}
acts.shape=


  0%|          | 0/5 [00:00<?, ?it/s][A
 40%|████      | 2/5 [00:00<00:00, 18.39it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])



100%|██████████| 5/5 [00:00<00:00, 12.70it/s][A

acts.shape=torch.Size([65536, 4096])
acts.shape=torch.Size([65536, 4096])
Num dead 


 29%|██▉       | 141611/488281 [30:13<10:02:17,  9.59it/s]

tensor(0.0002, device='cuda:0')
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141625/488281 [30:13<7:14:20, 13.30it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141639/488281 [30:13<5:16:46, 18.24it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141653/488281 [30:13<3:54:30, 24.64it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141667/488281 [30:13<2:56:55, 32.65it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141681/488281 [30:13<2:16:35, 42.29it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141709/488281 [30:14<1:37:42, 59.12it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.75762939453125, 'l2_loss': 66.75755310058594, 'l1_loss': 56.00008010864258}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=

 29%|██▉       | 141737/488281 [30:14<1:09:34, 83.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141765/488281 [30:14<55:46, 103.53it/s] 

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141793/488281 [30:14<49:00, 117.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141821/488281 [30:14<45:42, 126.32it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.95553588867188, 'l2_loss': 66.76448822021484, 'l1_loss': 56.191043853759766}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape

 29%|██▉       | 141849/488281 [30:15<44:05, 130.93it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141877/488281 [30:15<43:17, 133.34it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141905/488281 [30:15<49:20, 117.01it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 122.85999298095703, 'l2_loss': 66.76725769042969, 'l1_loss': 56.092735290527344}
acts.shape=torch.Size([4096, 4096])
acts.shape

 29%|██▉       | 141933/488281 [30:15<45:52, 125.83it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])


 29%|██▉       | 141961/488281 [30:15<44:21, 130.10it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 141989/488281 [30:16<43:24, 132.98it/s]

acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096,

 29%|██▉       | 142012/488281 [30:16<1:13:48, 78.18it/s]


acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
{'loss': 123.68717956542969, 'l2_loss': 67.21591186523438, 'l1_loss': 56.47126770019531}
acts.shape=torch.Size([4096, 4096])
acts.shape=torch.Size([4096, 4096])
Saved as version 87


KeyboardInterrupt: 