# Getting started with our ultimate beginner guide!

## This tutorial will walk you through the basics of using the `usb` lighting package. Let's get started by training a FixMatch model on CIFAR-10!

In [5]:
import sys
sys.path.append('../')

from semilearn import get_dataset, get_data_loader, get_net_builder, get_algorithm, get_config, Trainer


## Step 1: define configs and create config

In [6]:
config = {
    'algorithm': 'fixmatch',
    'net': 'vit_tiny_patch2_32',
    'use_pretrain': True, 
    'pretrain_path': 'https://github.com/microsoft/Semi-supervised-learning/releases/download/v.0.0.0/vit_tiny_patch2_32_mlp_im_1k_32.pth',

    # optimization configs
    'epoch': 1,  
    'num_train_iter': 5000,  
    'num_eval_iter': 500,  
    'num_log_iter': 50,  
    'optim': 'AdamW',
    'lr': 5e-4,
    'layer_decay': 0.5,
    'batch_size': 16,
    'eval_batch_size': 16,


    # dataset configs
    'dataset': 'cifar10',
    'num_labels': 40,
    'num_classes': 10,
    'img_size': 32,
    'crop_ratio': 0.875,
    'data_dir': '/data/datasets',
    'ulb_samples_per_class': None,

    # algorithm specific configs
    'hard_label': True,
    'uratio': 2,
    'ulb_loss_ratio': 1.0,

    # device configs
    'gpu': 0,
    'world_size': 1,
    'distributed': False,
    "num_workers": 2,
}
config = get_config(config)

## Step 2: create model and specify algorithm

In [8]:
algorithm = get_algorithm(config,  get_net_builder(config.net, from_name=False), tb_log=None, logger=None)

Files already downloaded and verified
lb count: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
ulb count: [5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000]
Files already downloaded and verified
unlabeled data number: 50000, labeled data number 40
Create train and test data loaders
[!] data loader keys: dict_keys(['train_lb', 'train_ulb', 'eval'])


Downloading: "https://github.com/microsoft/Semi-supervised-learning/releases/download/v.0.0.0/vit_tiny_patch2_32_mlp_im_1k_32.pth" to /home/y_yin/.cache/torch/hub/checkpoints/vit_tiny_patch2_32_mlp_im_1k_32.pth
100%|██████████| 115M/115M [00:01<00:00, 74.8MB/s] 


_IncompatibleKeys(missing_keys=['head.weight', 'head.bias'], unexpected_keys=[])
Create optimizer and scheduler


## Step 3: create dataset

In [9]:
dataset_dict = get_dataset(config, config.algorithm, config.dataset, config.num_labels, config.num_classes, data_dir=config.data_dir, include_lb_to_ulb=config.include_lb_to_ulb)
train_lb_loader = get_data_loader(config, dataset_dict['train_lb'], config.batch_size)
train_ulb_loader = get_data_loader(config, dataset_dict['train_ulb'], int(config.batch_size * config.uratio))
eval_loader = get_data_loader(config, dataset_dict['eval'], config.eval_batch_size)

Files already downloaded and verified
lb count: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
ulb count: [5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000]
Files already downloaded and verified


## Step 4: train

In [10]:
trainer = Trainer(config, algorithm)
trainer.fit(train_lb_loader, train_ulb_loader, eval_loader)

Epoch: 0
50 iteration USE_EMA: True, train/sup_loss: 2.3363, train/unsup_loss: 0.0000, train/total_loss: 2.3363, train/util_ratio: 0.0000, train/run_time: 0.2223, lr: 0.0005, train/prefetch_time: 0.0021 
100 iteration USE_EMA: True, train/sup_loss: 1.2222, train/unsup_loss: 0.0000, train/total_loss: 1.2222, train/util_ratio: 0.0000, train/run_time: 0.1553, lr: 0.0005, train/prefetch_time: 0.0024 
150 iteration USE_EMA: True, train/sup_loss: 0.5054, train/unsup_loss: 0.0000, train/total_loss: 0.5054, train/util_ratio: 0.0000, train/run_time: 0.2114, lr: 0.0005, train/prefetch_time: 0.0024 
200 iteration USE_EMA: True, train/sup_loss: 0.3123, train/unsup_loss: 0.0000, train/total_loss: 0.3123, train/util_ratio: 0.0000, train/run_time: 0.2250, lr: 0.0005, train/prefetch_time: 0.0022 
250 iteration USE_EMA: True, train/sup_loss: 0.3571, train/unsup_loss: 0.0000, train/total_loss: 0.3571, train/util_ratio: 0.0000, train/run_time: 0.2235, lr: 0.0005, train/prefetch_time: 0.0025 
300 iteratio

  _warn_prf(average, modifier, msg_start, len(result))


confusion matrix:
[[0.    0.    0.    0.    0.    0.88  0.    0.12  0.    0.   ]
 [0.    0.    0.    0.    0.    1.    0.    0.    0.    0.   ]
 [0.    0.    0.    0.    0.    0.979 0.    0.021 0.    0.   ]
 [0.    0.    0.    0.    0.    0.985 0.    0.015 0.    0.   ]
 [0.    0.    0.    0.    0.    0.965 0.    0.035 0.    0.   ]
 [0.    0.    0.    0.    0.    0.996 0.    0.004 0.    0.   ]
 [0.    0.    0.    0.    0.    0.993 0.    0.007 0.    0.   ]
 [0.    0.    0.    0.    0.    0.494 0.    0.506 0.    0.   ]
 [0.    0.    0.    0.    0.    0.979 0.    0.021 0.    0.   ]
 [0.    0.    0.    0.    0.    0.986 0.    0.014 0.    0.   ]]
model saved: ./saved_models/fixmatch/latest_model.pth
model saved: ./saved_models/fixmatch/model_best.pth
500 iteration, USE_EMA: True, train/sup_loss: 0.0190, train/unsup_loss: 0.0228, train/total_loss: 0.0419, train/util_ratio: 0.1562, train/run_time: 0.1993, eval/loss: 6.1640, eval/top-1-acc: 0.1502, eval/balanced_acc: 0.1502, eval/precision: 0.0

  _warn_prf(average, modifier, msg_start, len(result))


confusion matrix:
[[0.001 0.    0.    0.    0.    0.464 0.    0.467 0.068 0.   ]
 [0.    0.151 0.    0.    0.    0.773 0.    0.015 0.061 0.   ]
 [0.    0.    0.019 0.    0.    0.791 0.001 0.189 0.    0.   ]
 [0.    0.    0.    0.    0.    0.884 0.001 0.113 0.002 0.   ]
 [0.    0.    0.    0.    0.    0.355 0.    0.645 0.    0.   ]
 [0.    0.    0.    0.    0.    0.954 0.    0.046 0.    0.   ]
 [0.    0.    0.001 0.    0.    0.421 0.551 0.025 0.002 0.   ]
 [0.    0.    0.    0.    0.    0.075 0.    0.925 0.    0.   ]
 [0.    0.    0.    0.    0.    0.528 0.    0.085 0.387 0.   ]
 [0.    0.002 0.    0.    0.    0.749 0.    0.079 0.159 0.011]]
model saved: ./saved_models/fixmatch/latest_model.pth
model saved: ./saved_models/fixmatch/model_best.pth
1000 iteration, USE_EMA: True, train/sup_loss: 0.0079, train/unsup_loss: 0.4744, train/total_loss: 0.4822, train/util_ratio: 0.7188, train/run_time: 0.2148, eval/loss: 2.5447, eval/top-1-acc: 0.2999, eval/balanced_acc: 0.2999, eval/precision: 0.

[2023-10-24 00:57:11,708 INFO] confusion matrix
[2023-10-24 00:57:11,709 INFO] [[0.97  0.001 0.001 0.001 0.    0.    0.001 0.    0.016 0.01 ]
 [0.    0.986 0.    0.    0.    0.    0.    0.    0.    0.014]
 [0.043 0.    0.909 0.004 0.021 0.009 0.01  0.004 0.    0.   ]
 [0.003 0.002 0.005 0.908 0.009 0.049 0.019 0.002 0.002 0.001]
 [0.    0.    0.005 0.006 0.949 0.    0.01  0.029 0.001 0.   ]
 [0.002 0.    0.009 0.049 0.007 0.91  0.002 0.021 0.    0.   ]
 [0.003 0.    0.003 0.002 0.    0.001 0.989 0.    0.001 0.001]
 [0.008 0.    0.008 0.003 0.007 0.025 0.    0.948 0.    0.001]
 [0.013 0.005 0.001 0.    0.    0.    0.    0.    0.978 0.003]
 [0.007 0.023 0.    0.    0.    0.    0.    0.    0.003 0.967]]
[2023-10-24 00:57:11,710 INFO] evaluation metric
[2023-10-24 00:57:11,710 INFO] acc: 0.9514
[2023-10-24 00:57:11,711 INFO] precision: 0.9515
[2023-10-24 00:57:11,711 INFO] recall: 0.9514
[2023-10-24 00:57:11,711 INFO] f1: 0.9513
[2023-10-24 00:57:11,981 INFO] Best acc 0.9514 at epoch 0
[20

model saved: ./saved_models/fixmatch/latest_model.pth
model saved: ./saved_models/fixmatch/model_best.pth


## Step 5: evaluate

In [11]:
trainer.evaluate(eval_loader)

## Step 6: predict

In [7]:
y_pred, y_logits = trainer.predict(eval_loader)