# Testing ⏭ GreedLR Scheduler for 🤗 Transformers

## 0. Design of Experiments Framework

In [4]:
import itertools

### Step 1: Define Parameters

In [None]:
fixed_params = {
    'seed': ['42'],
    'do_train': ['True'],
    'save_strategy': ['no'],
    'logging_steps': ['10']
}


variable_params = {
    'model_name_or_path': ['bigscience/bloom-560m'],
    'dataset_name': ['truthfulqa'],
    'max_steps': ['1000'],
    'optim': ['adamw'],
    'lr_scheduler_type': ['greedylr'],
    'per_device_train_batch_size': ['8'],
    'rank': ['4'],
    
}

# TODO: Create dictionary to lookup model-specific variable params (e.g. lr, lr_scheduler_type)

all_params = {**fixed_params, **variable_params}
all_params

## Step 2: Define Individual Experiment

In [None]:
def run_experiment(parameters):
    params = " ".join([f"--{k} {v}" for k, v in parameters.items()])
    
    # TODO: need to abstract out LoRA rank (and other key PEFT config parameters) as an argument for the experiment
    
    tmp_dir = get_auto_remove_tmp_dir()
    testargs = f"""
    run_qa.py
    {params}
    --output_dir {tmp_dir}
    """.split()
    
    with patch.object(sys, "argv", testargs):
        run_qa.main()
        result = get_results(tmp_dir)
    
    all_logs = json.load(open(f'{tmp_dir}/trainer_state.json'))
    loss_metrics = [ (l['step'], l['learning_rate'], l['loss']) for l in all_logs['log_history'][:-1] ]
    
    return loss_metrics

## Step 3: Loop across all experiments

In [None]:
# experiment_results = []
print('List of Experiments:')
for values in itertools.product(*all_params.values()):
    params = dict(zip(all_params.keys(), values))
    # TODO Add model-specific parameters to params dict
    print(params)
    print()
    # result = run_experiment(params)
    # experiment_results.append((params, result))

## Step 4: Store results

In [None]:
df = pd.DataFrame(experiment_results, columns=['Parameters', 'Results'])
df.head()

In [None]:
# df.to_csv('./greedyLR_experiment_results.csv', index=False)

## Step 5: Analyze & Graph results

In [None]:
#TODO

## 1. Translation

### Need to reinstall from source to register changes

(may need to restart kernel)

In [7]:
%pip install -r translation/requirements.txt
%pip install -e ~/greedylr/transformers/ #Or wherever you downloaded this source 
%pip install -e ~/greedylr/peft/ #Or wherever you downloaded this source 

Collecting accelerate>=0.12.0 (from -r translation/requirements.txt (line 1))
  Obtaining dependency information for accelerate>=0.12.0 from https://files.pythonhosted.org/packages/10/d3/5382aa337d3e67214003a17b06bfc07cf0334356b4e8aaf3b12b0d38c83f/accelerate-0.20.3-py3-none-any.whl.metadata
  Downloading accelerate-0.20.3-py3-none-any.whl.metadata (17 kB)
Collecting datasets>=1.8.0 (from -r translation/requirements.txt (line 2))
  Obtaining dependency information for datasets>=1.8.0 from https://files.pythonhosted.org/packages/12/17/95e48481a826f85c918e0610257de493164096e29fe16ec408c0b862e057/datasets-2.13.1-py3-none-any.whl.metadata
  Downloading datasets-2.13.1-py3-none-any.whl.metadata (20 kB)
Collecting sentencepiece!=0.1.92 (from -r translation/requirements.txt (line 3))
  Using cached sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Collecting sacrebleu>=1.4.12 (from -r translation/requirements.txt (line 5))
  Using cached sacrebleu-2.3.1-py

Add test folders to PATH

In [5]:
import sys
import os
SRC_DIRS = [
    os.path.join('./', dirname) for dirname in [
        "text-generation",
        "text-classification",
        "token-classification",
        "language-modeling",
        "multiple-choice",
        "question-answering",
        "summarization",
        "translation",
        "image-classification",
        "speech-recognition",
        "audio-classification",
        "speech-pretraining",
        "image-pretraining",
        "semantic-segmentation",
    ]
]
sys.path.extend(SRC_DIRS)

In [2]:
from translation import run_translation
import argparse
import json
import logging
import os
import sys
from unittest.mock import patch

import torch

from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
from transformers.testing_utils import CaptureLogger, TestCasePlus, get_gpu_count, slow, torch_device
from transformers.utils import is_apex_available
from utils import * 

def get_results(output_dir):
    results = {}
    path = os.path.join(output_dir, "all_results.json")
    if os.path.exists(path):
        with open(path, "r") as f:
            results = json.load(f)
    else:
        raise ValueError(f"can't find {path}")
    return results

In [None]:
# %pip install sacremoses

### Default AdamW_HF with LambdaLR (linear, default)

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_translation.py
    --model_name_or_path bigscience/mt0-small
    --source_lang de
    --target_lang en
    --dataset_name opus100
    --dataset_config_name de-en
    --output_dir {tmp_dir}
    --overwrite_output_dir
    --max_steps=1000
    --do_train
    --learning_rate=1e-4
    --per_device_train_batch_size=4
    --save_strategy no
    --logging_steps 10
    --seed 42
    --optim sgd
    --lr_scheduler constant_with_warmup
""".split()

with patch.object(sys, "argv", testargs):
    run_translation.main()
    result = get_results(tmp_dir)
    # print(result["eval_bleu"]>30)

In [None]:
import json

alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d1 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

### With GreedyLR

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_translation.py
    --model_name_or_path bigscience/mt0-small
    --source_lang de
    --target_lang en
    --dataset_name opus100
    --dataset_config_name de-en
    --output_dir {tmp_dir}
    --overwrite_output_dir
    --max_steps=1000
    --warmup_steps=0
    --do_train
    --learning_rate=1e-4
    --per_device_train_batch_size=4
    --lr_scheduler_type greedy
    --save_strategy no
    --logging_steps 10
    --min_lr=1e-5
    --smooth True
    --patience 10
    --factor 0.95
    --seed 42
    --optim sgd
""".split()

with patch.object(sys, "argv", testargs):
    run_translation.main()
    result = get_results(tmp_dir)
    # print(result["eval_bleu"]>30)

In [None]:
alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d2 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

In [None]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,1]

x2 = np.array(d2)[:,0] 
y2 = np.array(d2)[:,1]

fig = plt.figure()
fig.set_figheight(4)
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('LRs')

In [None]:
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,2]

x2 = np.array(d2)[:,0] 
y2 = np.array(d2)[:,2]

fig = plt.figure()
fig.set_figheight(4)
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-',label='baseline')
line2, = ax.plot(x2, y2, 'r-',label='GreedyLR')
plt.legend()
plt.title('Loss')

In [None]:
d1[-1][-1] - d2[-1][-1]

### With GreedyLR + PEFT

Notes
- bigscience/mt0-large: runs into OOM error (TBD: is this unusual for a g5.16x instance?)


In [None]:
# TEST
# from sagemaker.remote_function import remote
# @remote(instance_type="ml.m5.2xlarge", dependencies='./translation/requirements.txt')
# def divide(x, y):
#     return x / y

# divide(2, 3.0)

In [6]:
# from peft import get_peft_config, get_peft_model, LoraConfig, TaskType
# TODO: submit lora_config parameters as arguments to Hf Trainer

# lora_config = LoraConfig(
#         task_type=TaskType.SEQ_2_SEQ_LM, 
#         inference_mode=False, 
#         r=8, 
#         lora_alpha=32, 
#         lora_dropout=0.1
#     )

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_translation.py
    --model_name_or_path bigscience/mt0-small
    --source_lang de
    --target_lang en
    --dataset_name opus100
    --dataset_config_name de-en
    --output_dir {tmp_dir}
    --overwrite_output_dir
    --max_steps=1000
    --warmup_steps=0
    --do_train
    --learning_rate=1e-4
    --per_device_train_batch_size=4
    --lr_scheduler_type greedy
    --save_strategy no
    --logging_steps 10
    --min_lr=1e-5
    --smooth True
    --patience 10
    --factor 0.95
    --seed 42
    --optim sgd
""".split()

with patch.object(sys, "argv", testargs):
    run_translation.main()
    result = get_results(tmp_dir)
    # print(result["eval_bleu"]>30)

In [None]:
# alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
# d2 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

In [None]:
!rm -r {tmp_dir}

In [None]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,1]

x2 = np.array(d2)[:,0] 
y2 = np.array(d2)[:,1]

fig = plt.figure()
fig.set_figheight(4)
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline + PEFT')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR + PEFT')
plt.legend()
plt.title('LRs')

In [None]:
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,2]

x2 = np.array(d2)[:,0] 
y2 = np.array(d2)[:,2]

fig = plt.figure()
fig.set_figheight(4)
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-',label='baseline + PEFT')
line2, = ax.plot(x2, y2, 'r-',label='GreedyLR + PEFT')
plt.legend()
plt.title('Loss')

In [None]:
d1[-1][-1] - d2[-1][-1]

## 2. Question Answering

In [None]:
import run_qa as run_squad
import argparse
import json
import logging
import os
import sys
from unittest.mock import patch

import torch

from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
from transformers.testing_utils import CaptureLogger, TestCasePlus, get_gpu_count, slow, torch_device
from transformers.utils import is_apex_available
from utils import *

def get_results(output_dir):
    results = {}
    path = os.path.join(output_dir, "all_results.json")
    if os.path.exists(path):
        with open(path, "r") as f:
            results = json.load(f)
    else:
        raise ValueError(f"can't find {path}")
    return results

### Default ADAMHF / Adafactor

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_qa.py
    --model_name_or_path xlm-roberta-base
    --dataset_name squad
    --output_dir {tmp_dir}
    --overwrite_output_dir
    --max_steps=5000
    --do_train
    --learning_rate=1e-4
    --per_device_train_batch_size=2
    --per_device_eval_batch_size=1
    --save_strategy no
    --logging_steps 10
    --seed 42
    --optim sgd
    --lr_scheduler_type constant_with_warmup
""".split()

    # --lr_scheduler_type cosine_with_restarts
    # --optim adafactor

with patch.object(sys, "argv", testargs):
    run_squad.main()
    result = get_results(tmp_dir)

In [None]:
import json

alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d1 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

### Greedy LR

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_qa.py
    --model_name_or_path xlm-roberta-base
    --dataset_name squad
    --output_dir {tmp_dir}
    --overwrite_output_dir
    --max_steps=5000
    --warmup_steps=0
    --do_train
    --learning_rate=1e-4
    --per_device_train_batch_size=4
    --per_device_eval_batch_size=2
    --lr_scheduler_type greedy
    --save_strategy no
    --logging_steps 10
    --min_lr=1e-5
    --smooth True
    --patience 10
    --factor 0.95
    --seed 42
    --optim sgd
""".split()

with patch.object(sys, "argv", testargs):
    run_squad.main()
    result = get_results(tmp_dir)

In [None]:
alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d2 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

In [None]:
import numpy as np
import matplotlib.pyplot as plt
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,1]

x2 = np.array(d2)[:,0] 
y2 = np.array(d2)[:,1]

fig = plt.figure()
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('LRs')
fig.set_figheight(4)

In [None]:
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,2]

x2 = np.array(d2)[:,0]
y2 = np.array(d2)[:,2]

fig = plt.figure()
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('Loss')
fig.set_figheight(4)

In [None]:
d1[-1][-1] - d2[-1][-1]

## Image Classification

In [None]:
%pip install torchvision

In [None]:
import run_image_classification
import argparse
import json
import logging
import os
import sys
from unittest.mock import patch

import torch

from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
from transformers.testing_utils import CaptureLogger, TestCasePlus, get_gpu_count, slow, torch_device
from transformers.utils import is_apex_available
from utils import *

def get_results(output_dir):
    results = {}
    path = os.path.join(output_dir, "all_results.json")
    if os.path.exists(path):
        with open(path, "r") as f:
            results = json.load(f)
    else:
        raise ValueError(f"can't find {path}")
    return results

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_image_classification.py
    --output_dir {tmp_dir}
    --model_name_or_path google/vit-base-patch16-224
    --dataset_name Maysee/tiny-imagenet
    --do_train
    --learning_rate 1e-4
    --per_device_train_batch_size 64
    --remove_unused_columns False
    --overwrite_output_dir True
    --metric_for_best_model accuracy
    --max_steps 1000
    --train_val_split 0.2
    --save_strategy no
    --logging_steps 10
    --ignore_mismatched_sizes True
    --seed 42
    --optim sgd
    --lr_scheduler_type constant_with_warmup
""".split()

with patch.object(sys, "argv", testargs):
    run_image_classification.main()
    result = get_results(tmp_dir)


In [None]:
import json

alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d1 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

!rm -r pymp*
!rm -r ./tmp*

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_image_classification.py
    --output_dir {tmp_dir}
    --model_name_or_path google/vit-base-patch16-224
    --dataset_name Maysee/tiny-imagenet
    --do_train
    --learning_rate 1e-4
    --per_device_train_batch_size 64
    --remove_unused_columns False
    --overwrite_output_dir True
    --metric_for_best_model accuracy
    --max_steps 1000
    --train_val_split 0.2
    --save_strategy no
    --logging_steps 10
    --ignore_mismatched_sizes True
    --lr_scheduler_type greedy
    --logging_steps 10
    --min_lr=1e-5
    --smooth True
    --patience 10
    --factor 0.95
    --optim sgd
""".split()

with patch.object(sys, "argv", testargs):
    run_image_classification.main()
    result = get_results(tmp_dir)

In [None]:
alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d2 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

In [None]:
import numpy as np
import matplotlib.pyplot as plt
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,1]

x2 = np.array(d2)[:,0] 
y2 = np.array(d2)[:,1]

fig = plt.figure()
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('LRs')
fig.set_figheight(4)

In [None]:
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,2]

x2 = np.array(d2)[:,0]
y2 = np.array(d2)[:,2]

fig = plt.figure()
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('Loss')
fig.set_figheight(4)

In [None]:
d1[-1][-1] - d2[-1][-1]

# Semantic Segmentation

In [None]:
%pip install -r semantic-segmentation/requirements.txt

In [None]:
import run_semantic_segmentation
import argparse
import json
import logging
import os
import sys
from unittest.mock import patch

import torch

from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
from transformers.testing_utils import CaptureLogger, TestCasePlus, get_gpu_count, slow, torch_device
from transformers.utils import is_apex_available
from utils import *

def get_results(output_dir):
    results = {}
    path = os.path.join(output_dir, "all_results.json")
    if os.path.exists(path):
        with open(path, "r") as f:
            results = json.load(f)
    else:
        raise ValueError(f"can't find {path}")
    return results

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_semantic_segmentation.py
    --output_dir {tmp_dir}
    --model_name_or_path nvidia/mit-b0
    --dataset_name segments/sidewalk-semantic
    --dataset_config mini
    --do_train
    --remove_unused_columns False
    --overwrite_output_dir True
    --max_steps 1000
    --learning_rate=1e-3
    --per_device_train_batch_size=2
    --save_strategy no
    --logging_steps 10
    --optim sgd
    --lr_scheduler_type constant_with_warmup
""".split()

with patch.object(sys, "argv", testargs):
    run_semantic_segmentation.main()
    result = get_results(tmp_dir)

In [None]:
import json

alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d1 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

!rm -r pymp*
!rm -r ./tmp*

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_semantic_segmentation.py
    --output_dir {tmp_dir}
    --model_name_or_path nvidia/mit-b0
    --dataset_name segments/sidewalk-semantic
    --do_train
    --remove_unused_columns False
    --overwrite_output_dir True
    --max_steps 1000
    --learning_rate=1e-3
    --per_device_train_batch_size=4
    --save_strategy no
    --logging_steps 10
    --seed 42
    --lr_scheduler_type greedy
    --logging_steps 10
    --min_lr=1e-5
    --smooth True
    --patience 10
    --factor 0.95
    --optim sgd
""".split()

with patch.object(sys, "argv", testargs):
    run_semantic_segmentation.main()
    result = get_results(tmp_dir)

In [None]:
alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d2 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

In [None]:
import numpy as np
import matplotlib.pyplot as plt
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,1]

x2 = np.array(d2)[:,0] 
y2 = np.array(d2)[:,1]

fig = plt.figure()
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('LRs')
fig.set_figheight(4)

In [None]:
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,2]

x2 = np.array(d2)[:,0]
y2 = np.array(d2)[:,2]

fig = plt.figure()
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('Loss')
fig.set_figheight(4)

In [None]:
d1[-1][-1] - d2[-1][-1]

## Summarization

In [None]:
%pip install --use-deprecated=legacy-resolver rouge-score

In [None]:
# %pip --no-cache-dir install -r summarization/requirements.txt

In [None]:
%pip install absl-py nltk numpy six>=1.14

In [None]:
%pip install --upgrade pip==21.0.0

In [None]:
%pip install --no-cache-dir rouge-score

In [None]:
import run_summarization
import argparse
import json
import logging
import os
import sys
from unittest.mock import patch

import torch

from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
from transformers.testing_utils import CaptureLogger, TestCasePlus, get_gpu_count, slow, torch_device
from transformers.utils import is_apex_available
from utils import *

def get_results(output_dir):
    results = {}
    path = os.path.join(output_dir, "all_results.json")
    if os.path.exists(path):
        with open(path, "r") as f:
            results = json.load(f)
    else:
        raise ValueError(f"can't find {path}")
    return results

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_summarization.py
    --model_name_or_path facebook/bart-base
    --dataset_name amazon_reviews_multi
    --output_dir {tmp_dir}
    --overwrite_output_dir
    --max_steps=1000
    --do_train
    --learning_rate=1e-4
    --per_device_train_batch_size=8
    --predict_with_generate
    --save_strategy no
    --logging_steps 10
    --seed 42
    --fp16 true
    --optim adagrad
    --lr_scheduler_type polynomial
""".split()

testargs.append('--source_prefix')
testargs.append("summarize: ")



with patch.object(sys, "argv", testargs):
    run_summarization.main()
    result = get_results(tmp_dir)

In [None]:
import json

alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d1 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

!rm -r pymp*
!rm -r ./tmp*

In [None]:
# %store -r d1

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_summarization.py
    --model_name_or_path facebook/bart-base
    --dataset_name amazon_reviews_multi
    --output_dir {tmp_dir}
    --overwrite_output_dir
    --max_steps=1000
    --do_train
    --learning_rate=1e-4
    --per_device_train_batch_size=8
    --predict_with_generate
    --save_strategy no
    --logging_steps 10
    --seed 42
    --lr_scheduler_type greedy
    --logging_steps 10
    --min_lr=1e-5
    --smooth True
    --patience 10
    --factor 0.95
    --fp16 true
    --optim adagrad
""".split()

testargs.append('--source_prefix')
testargs.append("summarize: ")

with patch.object(sys, "argv", testargs):
    run_summarization.main()
    result = get_results(tmp_dir)

In [None]:
alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d2 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

In [None]:
%matplotlib inline

In [None]:
import numpy as np
import matplotlib.pyplot as plt
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,1]

x2 = np.array(d2)[:,0] 
y2 = np.array(d2)[:,1]

fig = plt.figure()
fig.set_figheight(4)
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('LRs')

In [None]:
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,2]

x2 = np.array(d2)[:,0]
y2 = np.array(d2)[:,2]

fig = plt.figure()
ax = fig.add_subplot(111)
fig.set_figheight(4)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('Loss')

In [None]:
d1[-1][-1] - d2[-1][-1]


# Audio classification

In [None]:
import run_wav2vec2_pretraining_no_trainer
import argparse
import json
import logging
import os
import sys
from unittest.mock import patch

import torch

from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
from transformers.testing_utils import CaptureLogger, TestCasePlus, get_gpu_count, slow, torch_device
from transformers.utils import is_apex_available
from utils import *

In [None]:
%pip install -r speech-pretraining/requirements.txt

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_wav2vec2_pretraining_no_trainer.py
    --output_dir {tmp_dir}
    --model_name_or_path hf-internal-testing/tiny-random-wav2vec2
    --dataset_name librispeech_asr
    --dataset_config_names clean
    --dataset_split_names validation
    --learning_rate 1e-4
    --per_device_train_batch_size 4
    --per_device_eval_batch_size 4
    --preprocessing_num_workers 16
    --max_train_steps 2
    --validation_split_percentage 5
    --seed 42
""".split()


with patch.object(sys, "argv", testargs):
    run_wav2vec2_pretraining_no_trainer.main()
    model = Wav2Vec2ForPreTraining.from_pretrained(tmp_dir)
    self.assertIsNotNone(model)

# NER

In [None]:
import run_ner
import argparse
import json
import logging
import os
import sys
from unittest.mock import patch

import torch

from transformers import ViTMAEForPreTraining, Wav2Vec2ForPreTraining
from transformers.testing_utils import CaptureLogger, TestCasePlus, get_gpu_count, slow, torch_device
from transformers.utils import is_apex_available
from utils import *

In [None]:
# %pip install -r token-classification/requirements.txt

In [None]:
!nvidia-smi

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_ner.py
    --model_name_or_path camembert/camembert-large
    --dataset_name xglue
    --dataset_config ner
    --output_dir {tmp_dir}
    --overwrite_output_dir
    --do_train
    --max_steps=1000
    --learning_rate=1e-4
    --per_device_train_batch_size=16
    --save_strategy no
    --seed 42
    --logging_steps 10
    --fp16 true
    --optim sgd
    --lr_scheduler_type constant_with_warmup
""".split()


#     --lr_scheduler_type greedy
#     --logging_steps 10
#     --min_lr=1e-5
#     --smooth True
#     --patience 10
#     --factor 0.95

with patch.object(sys, "argv", testargs):
    run_ner.main()
    result = get_results(tmp_dir)
    
    
    # --optim: invalid choice: 'as' (choose from 'adamw_hf', 'adamw_torch', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad')

In [None]:
import json

alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d1 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

!rm -r pymp*
!rm -r ./tmp*

In [None]:
tmp_dir = get_auto_remove_tmp_dir()
testargs = f"""
    run_ner.py
    --model_name_or_path camembert/camembert-large
    --dataset_name xglue
    --dataset_config ner
    --output_dir {tmp_dir}
    --overwrite_output_dir
    --do_train
    --max_steps=1000
    --learning_rate=1e-4
    --per_device_train_batch_size=16
    --save_strategy no
    --seed 42
    --logging_steps 10
    --fp16 true
    --lr_scheduler_type greedy
    --logging_steps 10
    --min_lr=1e-5
    --smooth True
    --patience 10
    --factor 0.95
    --fp16 True
    --optim sgd
""".split()




with patch.object(sys, "argv", testargs):
    run_ner.main()
    result = get_results(tmp_dir)

In [None]:
alllogs = json.load(open(f'{tmp_dir}/trainer_state.json'))
d2 = [ (l['step'], l['learning_rate'], l['loss']) for l in alllogs['log_history'][:-1] ]

!rm -r {tmp_dir}

%matplotlib inline

In [None]:
import numpy as np
import matplotlib.pyplot as plt
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,1]

x2 = np.array(d2)[:,0] 
y2 = np.array(d2)[:,1]

fig = plt.figure()
fig.set_figheight(4)
ax = fig.add_subplot(111)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('LRs')

In [None]:
x1 = np.array(d1)[:,0]
y1 = np.array(d1)[:,2]

x2 = np.array(d2)[:,0]
y2 = np.array(d2)[:,2]

fig = plt.figure()
ax = fig.add_subplot(111)
fig.set_figheight(4)
line1, = ax.plot(x1, y1, 'b-', label='baseline')
line2, = ax.plot(x2, y2, 'r-', label='GreedyLR')
plt.legend()
plt.title('Loss')

In [None]:
# print(d1[-1][int(0.1*len(d1[-1]))-1] > d2[-1][int(0.1*len(d2[-1]))-1])
# print(d1[-1][int(0.5*len(d1[-1]))-1] > d2[-1][int(0.5*len(d2[-1]))-1])
# print(d1[-1][int(1*len(d1[-1]))-1] > d2[-1][int(1*len(d2[-1]))-1])


d1[-1][-1] - d2[-1][-1]

In [None]:
import sys
import os
SRC_DIRS = [
    os.path.join('./', dirname) for dirname in [
        "text-generation",
        "text-classification",
        "token-classification",
        "language-modeling",
        "multiple-choice",
        "question-answering",
        "summarization",
        "translation",
        "image-classification",
        "speech-recognition",
        "audio-classification",
        "speech-pretraining",
        "image-pretraining",
        "semantic-segmentation",
    ]
]
sys.path.extend(SRC_DIRS)