In [1]:
from tensorflow.keras.datasets import cifar10
from scipy import ndimage
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import regularizers
from tensorflow.keras import losses
from tensorflow.keras import metrics
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.utils import to_categorical

In [2]:
# code modified from  https://github.com/p-lambda/gradual_domain_adaptation

# helper functions
def get_preprocessed_cifar10():
    (train_x, train_y), (test_x, test_y) = cifar10.load_data()
    train_x, test_x = train_x / 255.0, test_x / 255.0
    train_x, train_y = shuffle(train_x, train_y)
    train_y = to_categorical(train_y)
    test_y = to_categorical(test_y)
    #train_x = np.expand_dims(np.array(train_x), axis=-1)
    #test_x = np.expand_dims(np.array(test_x), axis=-1)
    return (train_x, train_y), (test_x, test_y)

def continually_rotate_images(xs, start_angle, end_angle):
    new_xs = []
    num_points = xs.shape[0]
    for i in range(num_points):
        angle = float(end_angle - start_angle) / num_points * i + start_angle
        img = ndimage.rotate(xs[i], angle, reshape=False)
        new_xs.append(img)
    return np.array(new_xs)

def sample_rotate_images(xs, start_angle, end_angle):
    new_xs = []
    num_points = xs.shape[0]
    for i in range(num_points):
        if start_angle == end_angle:
            angle = start_angle
        else:
            angle = np.random.uniform(low=start_angle, high=end_angle)
        img = ndimage.rotate(xs[i], angle, reshape=False)
        new_xs.append(img)
    return np.array(new_xs)

def _transition_rotation_dataset(train_x, train_y, test_x, test_y,
                                 source_angles, target_angles, inter_func,
                                 src_train_end, src_val_end, inter_end, target_end):
    assert(target_end <= train_x.shape[0])
    assert(train_x.shape[0] == train_y.shape[0])
    src_tr_x, src_tr_y = train_x[:src_train_end], train_y[:src_train_end]
    src_tr_x = sample_rotate_images(src_tr_x, source_angles[0], source_angles[1])
    src_val_x, src_val_y = train_x[src_train_end:src_val_end], train_y[src_train_end:src_val_end]
    src_val_x = sample_rotate_images(src_val_x, source_angles[0], source_angles[1])
    tmp_inter_x, inter_y = train_x[src_val_end:inter_end], train_y[src_val_end:inter_end]
    inter_x = inter_func(tmp_inter_x)
    dir_inter_x = sample_rotate_images(tmp_inter_x, target_angles[0], target_angles[1])
    dir_inter_y = np.array(inter_y)
    # dir is the "intermediate" samples directly shifted to the target angles (as comparison)
    assert(inter_x.shape == dir_inter_x.shape)
    trg_val_x, trg_val_y = train_x[inter_end:target_end], train_y[inter_end:target_end]
    trg_val_x = sample_rotate_images(trg_val_x, target_angles[0], target_angles[1])
    trg_test_x, trg_test_y = test_x, test_y
    trg_test_x = sample_rotate_images(trg_test_x, target_angles[0], target_angles[1])
    return (src_tr_x, src_tr_y, src_val_x, src_val_y, inter_x, inter_y,
            dir_inter_x, dir_inter_y, trg_val_x, trg_val_y, trg_test_x, trg_test_y)
    

def make_rotated_dataset(train_x, train_y, test_x, test_y,
                         source_angles, inter_angles, target_angles,
                         src_train_end, src_val_end, inter_end, target_end):
    inter_func = lambda x: continually_rotate_images(x, inter_angles[0], inter_angles[1])
    return _transition_rotation_dataset(
        train_x, train_y, test_x, test_y, source_angles, target_angles,
        inter_func, src_train_end, src_val_end, inter_end, target_end)
    
def tweak_one_shift(x, y, rho):
    # assign class "3" probability rho - i started with 0 but realized that doesn't composite well w rotation...
    # since 0 rotated is still 0...
    # evenly distribute among the other classes
    # y is categorical, first reverse to numerical
    y_num = np.argmax(y, axis=1)
    x_res_list = []
    y_res_list = []
    n_total = x.shape[0]
    n_classes = 10
    n_others = int((1-rho)*n_total/(n_classes-1))
    n_chosen = n_total - n_others*(n_classes-1)
    # sample from zero class
    zero_idxs = np.argwhere(y_num == 7)[:, 0]
    zero_chosen = np.random.choice(zero_idxs, n_chosen)
    x_res_list.append(x[zero_chosen])
    y_res_list.append(y[zero_chosen])
    for i in [0,1,2,3,4,5,6,8,9]:
        cur_idxs = np.argwhere(y_num == i)[:,0]
        cur_selected = np.random.choice(cur_idxs, n_others)
        x_res_list.append(x[cur_selected])
        y_res_list.append(y[cur_selected])
    x_res = np.concatenate(x_res_list,axis=0)
    y_res = np.concatenate(y_res_list,axis=0)
    # shuffle
    shuffled_idx = np.arange(x_res.shape[0])
    np.random.shuffle(shuffled_idx)
    x_res_shuffled = x_res[shuffled_idx]
    y_res_shuffled = y_res[shuffled_idx]
    return x_res_shuffled, y_res_shuffled

def resample_class_dist(x, y, p_vec): # the p_vec could be e.g. drawn from Dirichlet dist.
    n_total = x.shape[0]
    y_ordinal = np.argmax(y,axis=1)
    px_counts = p_vec * n_total
    x_res_list = []
    y_res_list = []
    for i in range(10):
        cur_idxs = np.argwhere(y_ordinal == i)[:,0]
        #print(cur_idxs)
        if len(cur_idxs) == 0:
            continue
        cur_selected = np.random.choice(cur_idxs, int(px_counts[i]))
        x_res_list.append(x[cur_selected])
        y_res_list.append(y[cur_selected])
    x_res = np.concatenate(x_res_list,axis=0)
    y_res = np.concatenate(y_res_list,axis=0)
    # shuffle
    shuffled_idx = np.arange(x_res.shape[0])
    np.random.shuffle(shuffled_idx)
    x_res_shuffled = x_res[shuffled_idx]
    y_res_shuffled = y_res[shuffled_idx]
    return x_res_shuffled, y_res_shuffled

def rotated_cifar10_60_data_func_nols():
    (train_x, train_y), (test_x, test_y) = get_preprocessed_cifar10()
    return make_rotated_dataset(
        train_x, train_y, test_x, test_y, [0.0, 2.0], [2.0, 20.0], [18.0, 20.0],
        25000, 26000, 48000, 50000)

'''
def rotated_cifar10_60_data_func_tweakone(interval, target_rho, source_rho = 0.1):
    # interval is the granularity of label shift (change rho of shift per interval)
    (train_x, train_y), (test_x, test_y) = get_preprocessed_cifar10()
    (src_tr_x, src_tr_y, src_val_x, src_val_y, inter_x, inter_y,
     dir_inter_x, dir_inter_y, trg_val_x, trg_val_y, trg_test_x, trg_test_y) = make_rotated_dataset(
        train_x, train_y, test_x, test_y, [0.0, 2.0], [2.0, 20.0], [18.0, 20.0],
        25000, 26000, 48000, 50000)
    # add label shift
    # for intermediate images we also introduce intermediate shift with granularity of interval
    n_batches = int(inter_x.shape[0]/interval)+1
    rho_list = np.linspace(source_rho, target_rho, n_batches)
    inter_x_labelshifted = []
    inter_y_labelshifted = []
    for i in range(n_batches):
        if (i+1)*interval <= inter_x.shape[0]:
            cur_x = inter_x[i*interval:(i+1)*interval]
            cur_y = inter_y[i*interval:(i+1)*interval]
        else:
            cur_x = inter_x[i*interval:]
            cur_y = inter_y[i*interval:]
        cur_shifted_x, cur_shifted_y = tweak_one_shift(cur_x, cur_y, rho_list[i])
        inter_x_labelshifted.append(cur_shifted_x)
        inter_y_labelshifted.append(cur_shifted_y)
    inter_x_ls = np.concatenate(inter_x_labelshifted, axis=0)
    inter_y_ls = np.concatenate(inter_y_labelshifted, axis=0)
    
    # dir_inter_x and dir_inter_y are for comparison (directly self-train on target)
    dir_inter_x_ls, dir_inter_y_ls = tweak_one_shift(dir_inter_x, dir_inter_y, target_rho)

    # shift all of trg_val_x, trg_val_y, trg_test_x, trg_test_y
    trg_val_x_ls, trg_val_y_ls = tweak_one_shift(trg_val_x, trg_val_y, target_rho)
    trg_test_x_ls, trg_test_y_ls = tweak_one_shift(trg_test_x, trg_test_y, target_rho)

    return (src_tr_x, src_tr_y, src_val_x, src_val_y, inter_x_ls, inter_y_ls,
            dir_inter_x_ls, dir_inter_y_ls, trg_val_x_ls, trg_val_y_ls, trg_test_x_ls, trg_test_y_ls)
'''    

def rotated_cifar10_60_data_func_dirichlet(interval, alpha, n_classes=10):
    # interval is the granularity of x|y shift
    # x|y shift is gradual, but each step we have arbitrary label shift
    # return array of all dist vecs to get oracle training
    (train_x, train_y), (test_x, test_y) = get_preprocessed_cifar10()
    (src_tr_x, src_tr_y, src_val_x, src_val_y, inter_x, inter_y,
     dir_inter_x, dir_inter_y, trg_val_x, trg_val_y, trg_test_x, trg_test_y) = make_rotated_dataset(
        train_x, train_y, test_x, test_y, [0.0, 2.0], [2.0, 20.0], [18.0, 20.0],
        25000, 26000, 48000, 50000)
    # add label shift
    # for intermediate images, each step introduce arbitrary shift
    n_batches = int(inter_x.shape[0]/interval)
    inter_x_labelshifted = []
    inter_y_labelshifted = []
    dist_store = np.zeros((n_batches+1, n_classes))
    dist_store[0,:] = np.ones(n_classes)/n_classes # start with uniform
    for i in range(n_batches):
        cur_prob = np.random.dirichlet(np.array([alpha]*n_classes))
        dist_store[i+1,:] = cur_prob
        if (i+1)*interval <= inter_x.shape[0]:
            cur_x = inter_x[i*interval:(i+1)*interval]
            cur_y = inter_y[i*interval:(i+1)*interval]
        else:
            cur_x = inter_x[i*interval:]
            cur_y = inter_y[i*interval:]
        cur_shifted_x, cur_shifted_y = resample_class_dist(cur_x, cur_y, cur_prob)
        inter_x_labelshifted.append(cur_shifted_x)
        inter_y_labelshifted.append(cur_shifted_y)
    inter_x_ls = np.concatenate(inter_x_labelshifted, axis=0)
    inter_y_ls = np.concatenate(inter_y_labelshifted, axis=0)
    
    final_px = dist_store[-1,:]
    # dir_inter_x and dir_inter_y are for comparison (directly self-train on target)
    dir_inter_x_ls, dir_inter_y_ls = resample_class_dist(dir_inter_x, dir_inter_y, final_px)
    
    # shift all of trg_val_x, trg_val_y, trg_test_x, trg_test_y
    trg_val_x_ls, trg_val_y_ls = resample_class_dist(trg_val_x, trg_val_y, final_px)
    trg_test_x_ls, trg_test_y_ls = resample_class_dist(trg_test_x, trg_test_y, final_px)
    return (src_tr_x, src_tr_y, src_val_x, src_val_y, inter_x_ls, inter_y_ls,
            dir_inter_x_ls, dir_inter_y_ls, trg_val_x_ls, trg_val_y_ls, trg_test_x_ls, trg_test_y_ls, dist_store)



In [3]:
# training functions
def self_train_once(student, teacher, unsup_x, confidence_q=0.1, epochs=100, class_ws = None):
    # Do one bootstrapping step on unsup_x, where pred_model is used to make predictions,
    # and we use these predictions to update model.
    logits = teacher.predict(np.concatenate([unsup_x]))
    confidence = np.amax(logits, axis=1) - np.amin(logits, axis=1)
    alpha = np.quantile(confidence, confidence_q)
    indices = np.argwhere(confidence >= alpha)[:, 0]
    preds = np.argmax(logits, axis=1)
    preds = to_categorical(preds, num_classes=10)
    # apply class weights
    if class_ws is not None:
        # get class ws dict
        cw = {}
        for i in range(10):
            cw[i] = class_ws[i]
        student.fit(unsup_x[indices], preds[indices], epochs=epochs, class_weight = cw, verbose=False)
    else:
        student.fit(unsup_x[indices], preds[indices], epochs=epochs, verbose=False)

def soft_self_train_once(student, teacher, unsup_x, epochs=100):
    probs = teacher.predict(np.concatenate([unsup_x]))
    student.fit(unsup_x, probs, epochs=epochs, verbose=False)

def self_train(teacher, unsup_x, confidence_q=0.1, epochs=100, repeats=1,
               target_x=None, target_y=None, soft=False):
    accuracies = []
    student = teacher
    for i in range(repeats):
        if soft:
            soft_self_train_once(student, teacher, unsup_x, epochs)
        else:
            self_train_once(student, teacher, unsup_x, confidence_q, epochs)
        if target_x is not None and target_y is not None:
            _, accuracy = student.evaluate(target_x, target_y, verbose=True)
            accuracies.append(accuracy)
        teacher = student
    return accuracies, student

def gradual_self_train(teacher, unsup_x, debug_y, interval, confidence_q=0.1,
                       epochs=100, soft=False):
    upper_idx = int(unsup_x.shape[0] / interval)
    accuracies = []
    student = teacher
    for i in range(upper_idx):
        cur_xs = unsup_x[interval*i:interval*(i+1)]
        cur_ys = debug_y[interval*i:interval*(i+1)]
        # _, student = self_train(
        #     student_func, teacher, unsup_x, confidence_q, epochs, repeats=2)
        if soft:
            soft_self_train_once(student, teacher, cur_xs, epochs)
        else:
            self_train_once(student, teacher, cur_xs, confidence_q, epochs)
        _, accuracy = student.evaluate(cur_xs, cur_ys)
        accuracies.append(accuracy)
        teacher = student
    return accuracies, student

def gradual_corrected_self_train(teacher, unsup_x, debug_y, interval, dist_store, confidence_q=0.1,
                       epochs=100, soft=False):
    upper_idx = int(unsup_x.shape[0] / interval)
    accuracies = []
    student = teacher
    for i in range(upper_idx):
        cur_xs = unsup_x[interval*i:interval*(i+1)]
        cur_ys = debug_y[interval*i:interval*(i+1)]
       
        # weight by distance store
        class_ws = dist_store[i+1,:]/dist_store[i,:]
        # normalize
        class_ws = class_ws / np.mean(class_ws)
        
        if soft:
            soft_self_train_once(student, teacher, cur_xs, epochs)
        else:
            self_train_once(student, teacher, cur_xs, confidence_q, epochs, class_ws=class_ws)
        _, accuracy = student.evaluate(cur_xs, cur_ys)
        accuracies.append(accuracy)
        teacher = student
    return accuracies, student

In [4]:
# model
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from tensorflow.keras.optimizers import SGD

def simple_softmax_conv_model():
    model = Sequential()

    model.add(layers.Conv2D(32, (3,3), padding='same', activation='relu', input_shape=(32,32,3)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(32, (3,3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2,2)))
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(64, (3,3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(64, (3,3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2,2)))
    model.add(layers.Dropout(0.5))

    model.add(layers.Conv2D(128, (3,3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(128, (3,3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2,2)))
    model.add(layers.Dropout(0.5))

    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(10, activation='softmax')) 
    # compile model
    
    model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
    return model



In [5]:
def rand_seed(seed):
    np.random.seed(seed)
    tf.compat.v1.set_random_seed(seed)

def shuffle(xs, ys):
    indices = list(range(len(xs)))
    np.random.shuffle(indices)
    return xs[indices], ys[indices]

In [14]:
def exp(seed, label_shift, tweak_one_rho = None, dirichlet_alpha = None, interval=2000, epochs=100, conf_q=0.1, n_classes=10):
    # get data
    if label_shift == "NO_LS":
        (src_tr_x, src_tr_y, src_val_x, src_val_y, inter_x, inter_y, dir_inter_x, dir_inter_y,
         trg_eval_x, trg_eval_y, trg_test_x, trg_test_y) = rotated_cifar10_60_data_func_nols()
        n_batches = int(inter_x.shape[0]/interval)
        dist_store = np.ones((n_batches+1, n_classes))
    elif label_shift == "TWEAK_ONE":
        if tweak_one_rho is None:
            print("no tweak one rho!")
            return
        (src_tr_x, src_tr_y, src_val_x, src_val_y, inter_x, inter_y, dir_inter_x, dir_inter_y,
         trg_eval_x, trg_eval_y, trg_test_x, trg_test_y) = rotated_cifar10_60_data_func_tweakone(interval, tweak_one_rho)
    elif label_shift == "DIRICHLET":
        if dirichlet_alpha is None:
            print("no dirichlet alpha!")
            return
        (src_tr_x, src_tr_y, src_val_x, src_val_y, inter_x, inter_y, dir_inter_x, dir_inter_y,
         trg_eval_x, trg_eval_y, trg_test_x, trg_test_y, diststore) = rotated_cifar10_60_data_func_dirichlet(interval, dirichlet_alpha)
        
        print(diststore)
    # set seed
    rand_seed(seed)

    # Train source model
    source_model = simple_softmax_conv_model()
    source_model.fit(src_tr_x, src_tr_y, epochs=epochs, verbose=False)
    _, src_acc = source_model.evaluate(src_val_x, src_val_y)
    _, srcmodel_acc = source_model.evaluate(trg_eval_x, trg_eval_y)

    
    # Train directly on target (oracle)
    print("\n\n Direct train on target:")
    target_model = simple_softmax_conv_model()
    target_model.fit(dir_inter_x, dir_inter_y, epochs=epochs, verbose=False)
    _, oracle_target_acc = target_model.evaluate(trg_eval_x, trg_eval_y)
    
    # comment this out bc we already know it's bad
    '''
    # self training on target (bad baseline)
    NUM_REPEATS = 1 # they repeat more in the paper, but small difference
    print("\n\n Direct self train on target:")
    teacher = simple_softmax_conv_model()
    teacher.set_weights(source_model.get_weights())
    target_accuracies, _ = self_train(teacher, dir_inter_x, epochs=epochs, target_x=trg_eval_x,
            target_y=trg_eval_y, repeats=NUM_REPEATS, confidence_q=conf_q)
    '''
    # Gradual self-training (paper baseline)
    print("\n\n Gradual self-training:")
    teacher = simple_softmax_conv_model()
    teacher.set_weights(source_model.get_weights())
    gradual_accuracies, student = gradual_self_train(teacher, inter_x, inter_y, interval, epochs=epochs,
            confidence_q=conf_q)
    _, acc = student.evaluate(trg_eval_x, trg_eval_y)
    gradual_accuracies.append(acc)
    
    # corrected gradual self training (assuming we know label shift)
    print("\n\n Gradual corrected self-training:")
    teacher = simple_softmax_conv_model()
    teacher.set_weights(source_model.get_weights())
    gradual_corrected_accuracies, student = gradual_corrected_self_train(teacher, inter_x, inter_y, interval, diststore, epochs=epochs,
            confidence_q=conf_q)
    _, acc = student.evaluate(trg_eval_x, trg_eval_y)
    gradual_corrected_accuracies.append(acc)
    

    print(f"Source model acc on target: {srcmodel_acc}")
    #print(f"direct self train on target: {target_accuracies[-1]}")
    print(f"gradual self training accuracies: {gradual_accuracies[-1]}")
    print(f"gradual labelshift corrected self training accuracies: {gradual_corrected_accuracies[-1]}")
    print(f"oracle accuracy: {oracle_target_acc}")
    return np.array([srcmodel_acc, gradual_accuracies[-1], gradual_corrected_accuracies[-1], oracle_target_acc]), diststore

In [7]:
def exp_dirichlet(seed):
    alpha = 1
    res, diststore = exp(seed, "DIRICHLET", dirichlet_alpha=alpha)
    return res, diststore
    

In [15]:
res_nols, diststore_nols = exp(1, "NO_LS")



 Direct train on target:


 Gradual self-training:


 Gradual corrected self-training:


UnboundLocalError: local variable 'diststore' referenced before assignment

In [None]:
res = np.zeros((10,5,4))
outres = open("res.txt", "ab")
shifts = open("distr.txt", "ab")
for seed in range(10):
    res, diststore = exp_dirichlet(seed)
    np.savetxt(outres, res)
    np.savetxt(shifts, diststore)
    

In [None]:
# RESULTS
# no label shift exp
train on src, eval on target: 0.717
gradual self training accuracy: 0.576
oracle accuracy: 0.815

# alpha=1 exps
# repeat 0
train on src, eval on target: 0.6760280728340149
gradual self training accuracy: 0.38766297698020935
gradual labelshift corrected self training accuracy: 0.27733200788497925
oracle (train on target) accuracy: 0.7938816547393799
# repeat 1
train on src, eval on target: 0.6609829664230347
gradual self training accuracy: 0.3154463469982147
gradual labelshift corrected self training accuracy: 0.27482447028160095
oracle accuracy: 0.8159478306770325
# repeat 2
train on src, eval on target: 0.6508015990257263
gradual self training accuracy: 0.2374749481678009
gradual labelshift corrected self training accuracy: 0.18286573886871338
oracle accuracy: 0.7990981936454773



In [19]:
res = np.array([[0.6760280728340149,0.38766297698020935,0.27733200788497925,0.7938816547393799],
[0.6609829664230347,0.3154463469982147,0.27482447028160095,0.8159478306770325],
[0.6508015990257263,0.2374749481678009,0.18286573886871338,0.7990981936454773]])
avgs = np.mean(res, axis=0)
stds = np.std(res, axis=0)
print("src model, self train, self train corrected, oracle")
print(f"avg: {avgs}")
print(f"std: {stds}")

src model, self train, self train corrected, oracle
avg: [0.66260421 0.31352809 0.24500741 0.80297589]
std: [0.01036227 0.06132901 0.04395272 0.00941653]


In [None]:
logs
alpha=1
[[1.00000000e-01 1.00000000e-01 1.00000000e-01 1.00000000e-01
  1.00000000e-01 1.00000000e-01 1.00000000e-01 1.00000000e-01
  1.00000000e-01 1.00000000e-01]
 [5.98460406e-02 2.06062929e-02 1.47128830e-01 5.35340629e-02
  3.69837384e-01 1.98609164e-01 1.87344720e-03 1.85135918e-02
  1.20694752e-01 9.35643334e-03]
 [9.86610231e-02 1.20809247e-01 1.02357901e-01 2.32138436e-02
  6.62202892e-03 1.04673658e-01 6.14302528e-02 7.34079179e-02
  4.93948312e-02 3.59429296e-01]
 [1.80272907e-02 3.38710834e-01 5.74091022e-02 2.14189091e-04
  2.76154233e-02 1.78721573e-02 5.10846860e-01 3.79673791e-03
  1.66561532e-02 8.85125138e-03]
 [3.19076616e-02 7.78511644e-02 8.60216457e-02 1.84547842e-01
  6.12788883e-05 6.67306143e-02 1.22249504e-01 2.74718568e-01
  1.43962712e-01 1.19490093e-02]
 [2.33818945e-03 8.76027612e-03 1.42729179e-01 4.44581232e-02
  5.13748578e-02 3.69529137e-01 8.82811146e-03 2.20682911e-01
  1.28443714e-01 2.28555014e-02]
 [3.70396315e-02 4.27193733e-02 2.98753893e-02 9.55501092e-02
  3.61968121e-01 1.50798260e-01 1.32115036e-01 3.65379206e-02
  1.09194466e-01 4.20169267e-03]
 [3.82825216e-02 3.47015664e-02 1.04605034e-02 3.70738473e-02
  2.35967850e-03 2.77359551e-01 1.00891389e-01 1.86744056e-01
  6.03188698e-02 2.51808018e-01]
 [1.36778149e-02 2.75106614e-01 9.48697287e-04 1.31748925e-01
  1.92103751e-01 4.26160007e-02 1.49877041e-01 7.92831861e-03
  1.69095181e-01 1.68976566e-02]
 [1.47463733e-01 4.07800109e-01 5.19735731e-02 4.90014734e-02
  4.65121983e-02 9.97861321e-02 1.31003315e-02 6.68285093e-02
  5.99581655e-02 5.75757749e-02]
 [8.70900148e-03 1.20176504e-01 3.51113682e-01 8.74317348e-02
  5.93625339e-02 6.01045512e-02 5.18997947e-02 2.04643032e-01
  1.95399615e-02 3.70192042e-02]
 [1.11995580e-01 2.83017458e-03 9.53220545e-02 3.07614750e-01
  1.43156039e-01 2.77195814e-03 2.34853268e-01 2.92612777e-02
  3.03103352e-02 4.18845632e-02]]
32/32 [==============================] - 1s 21ms/step - loss: 0.5214 - accuracy: 0.847
63/63 [==============================] - 1s 21ms/step - loss: 1.1510 - accuracy: 0.676


 Gradual corrected self-training:
63/63 [==============================] - 2s 21ms/step - loss: 1.8416 - accuracy: 0.831
63/63 [==============================] - 1s 21ms/step - loss: 1.5860 - accuracy: 0.802
63/63 [==============================] - 1s 21ms/step - loss: 2.3855 - accuracy: 0.757
63/63 [==============================] - 2s 27ms/step - loss: 2.8376 - accuracy: 0.656
63/63 [==============================] - 1s 22ms/step - loss: 2.3238 - accuracy: 0.751
63/63 [==============================] - 1s 22ms/step - loss: 4.9589 - accuracy: 0.514
63/63 [==============================] - 1s 21ms/step - loss: 3.4249 - accuracy: 0.691
63/63 [==============================] - 1s 22ms/step - loss: 4.6846 - accuracy: 0.586
63/63 [==============================] - 1s 22ms/step - loss: 4.9627 - accuracy: 0.620
63/63 [==============================] - 2s 28ms/step - loss: 9.8471 - accuracy: 0.394
63/63 [==============================] - 2s 28ms/step - loss: 10.8150 - accuracy: 0.27


 Direct train on target:
63/63 [==============================] - 2s 21ms/step - loss: 0.9522 - accuracy: 0.793


 Gradual self-training:
63/63 [==============================] - 2s 21ms/step - loss: 2.4417 - accuracy: 0.817
63/63 [==============================] - 1s 22ms/step - loss: 3.1940 - accuracy: 0.789
63/63 [==============================] - 1s 21ms/step - loss: 2.9124 - accuracy: 0.815
63/63 [==============================] - 1s 21ms/step - loss: 6.9415 - accuracy: 0.552
63/63 [==============================] - 1s 21ms/step - loss: 8.8265 - accuracy: 0.505
63/63 [==============================] - 1s 21ms/step - loss: 7.0799 - accuracy: 0.568
63/63 [==============================] - 2s 26ms/step - loss: 8.4447 - accuracy: 0.467
63/63 [==============================] - 1s 21ms/step - loss: 5.9749 - accuracy: 0.642
63/63 [==============================] - 1s 21ms/step - loss: 7.4782 - accuracy: 0.539
63/63 [==============================] - 1s 21ms/step - loss: 9.0148 - accuracy: 0.434
63/63 [==============================] - 1s 21ms/step - loss: 10.1633 - accuracy: 0.38
Source model acc on target: 0.6760280728340149
gradual self training accuracies: 0.38766297698020935
gradual labelshift corrected self training accuracies: 0.27733200788497925
oracle accuracy: 0.7938816547393799
[[1.00000000e-01 1.00000000e-01 1.00000000e-01 1.00000000e-01
  1.00000000e-01 1.00000000e-01 1.00000000e-01 1.00000000e-01
  1.00000000e-01 1.00000000e-01]
 [5.98460406e-02 2.06062929e-02 1.47128830e-01 5.35340629e-02
  3.69837384e-01 1.98609164e-01 1.87344720e-03 1.85135918e-02
  1.20694752e-01 9.35643334e-03]
 [9.86610231e-02 1.20809247e-01 1.02357901e-01 2.32138436e-02
  6.62202892e-03 1.04673658e-01 6.14302528e-02 7.34079179e-02
  4.93948312e-02 3.59429296e-01]
 [1.80272907e-02 3.38710834e-01 5.74091022e-02 2.14189091e-04
  2.76154233e-02 1.78721573e-02 5.10846860e-01 3.79673791e-03
  1.66561532e-02 8.85125138e-03]
 [3.19076616e-02 7.78511644e-02 8.60216457e-02 1.84547842e-01
  6.12788883e-05 6.67306143e-02 1.22249504e-01 2.74718568e-01
  1.43962712e-01 1.19490093e-02]
 [2.33818945e-03 8.76027612e-03 1.42729179e-01 4.44581232e-02
  5.13748578e-02 3.69529137e-01 8.82811146e-03 2.20682911e-01
  1.28443714e-01 2.28555014e-02]
 [3.70396315e-02 4.27193733e-02 2.98753893e-02 9.55501092e-02
  3.61968121e-01 1.50798260e-01 1.32115036e-01 3.65379206e-02
  1.09194466e-01 4.20169267e-03]
 [3.82825216e-02 3.47015664e-02 1.04605034e-02 3.70738473e-02
  2.35967850e-03 2.77359551e-01 1.00891389e-01 1.86744056e-01
  6.03188698e-02 2.51808018e-01]
 [1.36778149e-02 2.75106614e-01 9.48697287e-04 1.31748925e-01
  1.92103751e-01 4.26160007e-02 1.49877041e-01 7.92831861e-03
  1.69095181e-01 1.68976566e-02]
 [1.47463733e-01 4.07800109e-01 5.19735731e-02 4.90014734e-02
  4.65121983e-02 9.97861321e-02 1.31003315e-02 6.68285093e-02
  5.99581655e-02 5.75757749e-02]
 [8.70900148e-03 1.20176504e-01 3.51113682e-01 8.74317348e-02
  5.93625339e-02 6.01045512e-02 5.18997947e-02 2.04643032e-01
  1.95399615e-02 3.70192042e-02]
 [1.11995580e-01 2.83017458e-03 9.53220545e-02 3.07614750e-01
  1.43156039e-01 2.77195814e-03 2.34853268e-01 2.92612777e-02
  3.03103352e-02 4.18845632e-02]]
32/32 [==============================] - 1s 21ms/step - loss: 0.5346 - accuracy: 0.848
63/63 [==============================] - 1s 21ms/step - loss: 1.1610 - accuracy: 0.661


 Gradual corrected self-training:
63/63 [==============================] - 2s 22ms/step - loss: 2.0265 - accuracy: 0.804
63/63 [==============================] - 2s 28ms/step - loss: 1.4233 - accuracy: 0.826
63/63 [==============================] - 1s 21ms/step - loss: 0.9952 - accuracy: 0.875
63/63 [==============================] - 1s 21ms/step - loss: 4.5037 - accuracy: 0.515
63/63 [==============================] - 1s 21ms/step - loss: 3.3959 - accuracy: 0.523
63/63 [==============================] - 1s 22ms/step - loss: 7.8660 - accuracy: 0.439
63/63 [==============================] - 1s 21ms/step - loss: 7.0080 - accuracy: 0.453
63/63 [==============================] - 2s 27ms/step - loss: 7.3015 - accuracy: 0.540
63/63 [==============================] - 1s 21ms/step - loss: 6.0705 - accuracy: 0.488
63/63 [==============================] - 1s 21ms/step - loss: 13.8238 - accuracy: 0.26
63/63 [==============================] - 1s 21ms/step - loss: 14.5816 - accuracy: 0.27


 Direct train on target:
63/63 [==============================] - 1s 20ms/step - loss: 0.8018 - accuracy: 0.815


 Gradual self-training:
63/63 [==============================] - 2s 21ms/step - loss: 3.0676 - accuracy: 0.779
63/63 [==============================] - 1s 21ms/step - loss: 2.4622 - accuracy: 0.825
63/63 [==============================] - 1s 21ms/step - loss: 2.3713 - accuracy: 0.827
63/63 [==============================] - 1s 21ms/step - loss: 6.5385 - accuracy: 0.584
63/63 [==============================] - 2s 28ms/step - loss: 7.7225 - accuracy: 0.473
63/63 [==============================] - 1s 21ms/step - loss: 7.7483 - accuracy: 0.499
63/63 [==============================] - 1s 21ms/step - loss: 6.5339 - accuracy: 0.552
63/63 [==============================] - 1s 21ms/step - loss: 7.1077 - accuracy: 0.587
63/63 [==============================] - 1s 21ms/step - loss: 8.7359 - accuracy: 0.499
63/63 [==============================] - 1s 21ms/step - loss: 13.6623 - accuracy: 0.27
63/63 [==============================] - 1s 21ms/step - loss: 12.9987 - accuracy: 0.31
Source model acc on target: 0.6609829664230347
gradual self training accuracies: 0.3154463469982147
gradual labelshift corrected self training accuracies: 0.27482447028160095
oracle accuracy: 0.8159478306770325
[[0.1        0.1        0.1        0.1        0.1        0.1
  0.1        0.1        0.1        0.1       ]
 [0.00920846 0.10519726 0.24446268 0.21915013 0.078105   0.10100306
  0.11307841 0.08177611 0.03402342 0.01399547]
 [0.11388653 0.04699108 0.10161898 0.00663361 0.18645427 0.02723764
  0.29198539 0.06954231 0.07508158 0.08056861]
 [0.09438969 0.0538275  0.09765329 0.11742985 0.14192956 0.37562173
  0.05074547 0.05286978 0.01133437 0.00419877]
 [0.05876256 0.05096457 0.35305816 0.06115281 0.04001653 0.14005111
  0.05543155 0.03367892 0.1898005  0.01708329]
 [0.21238748 0.23761127 0.0420231  0.01490568 0.10287508 0.0617557
  0.15575657 0.0042202  0.06935607 0.09910885]
 [0.11334969 0.18783714 0.07462556 0.01514986 0.02015026 0.01485546
  0.20029082 0.04120549 0.09357309 0.23896264]
 [0.03895728 0.01025825 0.08367702 0.09148604 0.08526358 0.03415651
  0.29143287 0.08852153 0.25552561 0.02072131]
 [0.06583194 0.09082959 0.07238788 0.00119135 0.06649471 0.31316225
  0.16053588 0.13419333 0.05980999 0.03556308]
 [0.22319705 0.00404535 0.11741573 0.45527205 0.03985211 0.00234375
  0.03558894 0.00746535 0.07005122 0.04476847]
 [0.03260773 0.22077345 0.20031461 0.06485537 0.01845554 0.21781631
  0.13970526 0.09724501 0.00261607 0.00561064]
 [0.09363265 0.03212249 0.18023477 0.18799683 0.02432023 0.04011434
  0.09911122 0.01513478 0.09459257 0.23274011]]
32/32 [==============================] - 1s 27ms/step - loss: 0.4523 - accuracy: 0.872
63/63 [==============================] - 2s 27ms/step - loss: 1.2220 - accuracy: 0.650


 Gradual corrected self-training:
63/63 [==============================] - 2s 21ms/step - loss: 2.5289 - accuracy: 0.788
63/63 [==============================] - 1s 21ms/step - loss: 1.8929 - accuracy: 0.811
63/63 [==============================] - 1s 21ms/step - loss: 2.9649 - accuracy: 0.684
63/63 [==============================] - 1s 21ms/step - loss: 4.2602 - accuracy: 0.621
63/63 [==============================] - 1s 21ms/step - loss: 3.5444 - accuracy: 0.702
63/63 [==============================] - 2s 28ms/step - loss: 4.0961 - accuracy: 0.663
63/63 [==============================] - 1s 21ms/step - loss: 5.2019 - accuracy: 0.589
63/63 [==============================] - 1s 21ms/step - loss: 6.0464 - accuracy: 0.494
63/63 [==============================] - 1s 22ms/step - loss: 10.5697 - accuracy: 0.22
63/63 [==============================] - 1s 21ms/step - loss: 7.5025 - accuracy: 0.392
63/63 [==============================] - 1s 20ms/step - loss: 9.4568 - accuracy: 0.182


 Direct train on target:
63/63 [==============================] - 2s 21ms/step - loss: 0.8239 - accuracy: 0.799


 Gradual self-training:
63/63 [==============================] - 2s 21ms/step - loss: 2.6489 - accuracy: 0.793
63/63 [==============================] - 1s 21ms/step - loss: 2.2170 - accuracy: 0.822
63/63 [==============================] - 2s 27ms/step - loss: 4.5735 - accuracy: 0.693
63/63 [==============================] - 1s 21ms/step - loss: 5.7156 - accuracy: 0.611
63/63 [==============================] - 1s 21ms/step - loss: 4.8029 - accuracy: 0.663
63/63 [==============================] - 1s 21ms/step - loss: 4.9801 - accuracy: 0.666
63/63 [==============================] - 1s 22ms/step - loss: 5.4112 - accuracy: 0.628
63/63 [==============================] - 1s 21ms/step - loss: 7.2780 - accuracy: 0.522
63/63 [==============================] - 2s 27ms/step - loss: 16.8916 - accuracy: 0.21
63/63 [==============================] - 1s 21ms/step - loss: 11.3129 - accuracy: 0.35
63/63 [==============================] - 1s 21ms/step - loss: 12.6241 - accuracy: 0.23
Source model acc on target: 0.6508015990257263
gradual self training accuracies: 0.2374749481678009
gradual labelshift corrected self training accuracies: 0.18286573886871338
oracle accuracy: 0.7990981936454773
[[0.1        0.1        0.1        0.1        0.1        0.1
  0.1        0.1        0.1        0.1       ]
 [0.43426777 0.02548648 0.09784025 0.01016087 0.20347776 0.03905409
  0.07151477 0.0679321  0.01984771 0.03041821]
 [0.0152778  0.14329793 0.00777125 0.03368301 0.10856033 0.00424655
  0.07217006 0.24334717 0.0955263  0.27611961]
 [0.17644353 0.0071628  0.06353623 0.2734867  0.1323496  0.09032544
  0.0523811  0.09483121 0.1066804  0.002803  ]
 [0.08267022 0.01389437 0.12645167 0.10969503 0.03590553 0.10142308
  0.00588439 0.12409575 0.28276259 0.11721737]
 [0.07732292 0.03154491 0.20944527 0.0278611  0.05623777 0.24802172
  0.05195967 0.02086557 0.00774858 0.26899249]
 [0.05740667 0.03426496 0.03311653 0.02752302 0.08898477 0.12038785
  0.09554451 0.12947775 0.39384525 0.01944869]
 [0.00942974 0.01730401 0.01941036 0.06065468 0.09207203 0.08145116
  0.45367087 0.16969496 0.08213811 0.01417409]
 [0.10205719 0.01482108 0.01716312 0.23214785 0.20865439 0.01085493
  0.08700781 0.14193612 0.02780943 0.15754809]
 [0.04531372 0.00384367 0.09939776 0.15298637 0.07508423 0.00904435
  0.3178818  0.2561961  0.01719192 0.02306008]
 [0.01782723 0.29889221 0.09204134 0.20569379 0.02600816 0.05225984
  0.21971785 0.02382796 0.01250112 0.0512305 ]
 [0.10189782 0.34439464 0.08150505 0.13021925 0.01294888 0.1704595
  0.05289639 0.07382536 0.03124227 0.00061085]]
32/32 [==============================] - 1s 21ms/step - loss: 0.5191 - accuracy: 0.847
63/63 [==============================] - 1s 21ms/step - loss: 0.8420 - accuracy: 0.741


 Gradual corrected self-training:
63/63 [==============================] - 2s 21ms/step - loss: 1.4267 - accuracy: 0.859
63/63 [==============================] - 1s 21ms/step - loss: 1.3612 - accuracy: 0.851
63/63 [==============================] - 1s 21ms/step - loss: 4.7195 - accuracy: 0.590
63/63 [==============================] - 2s 27ms/step - loss: 3.3099 - accuracy: 0.669


alpha=3
[[0.1        0.1        0.1        0.1        0.1        0.1
  0.1        0.1        0.1        0.1       ]
 [0.02809297 0.11630723 0.079684   0.09803938 0.06429211 0.20740738
  0.16656009 0.09682844 0.06031113 0.08247726]
 [0.14636088 0.05364192 0.0581611  0.14510109 0.13515942 0.10033193
  0.06023741 0.08604272 0.12840842 0.08655511]
 [0.07998439 0.10002483 0.08033415 0.02605783 0.18004121 0.07131745
  0.15897384 0.14379974 0.13303226 0.0264343 ]
 [0.13353901 0.16185091 0.20553315 0.02925504 0.08088792 0.09193798
  0.11582048 0.09803076 0.05782158 0.02532317]
 [0.11115675 0.07574319 0.05471012 0.05691571 0.09436329 0.06984626
  0.13592566 0.16245463 0.17389904 0.06498535]
 [0.08618071 0.089992   0.08721382 0.1233224  0.16014676 0.0300626
  0.04865356 0.22582798 0.10337167 0.04522848]
 [0.12864227 0.01756888 0.13742367 0.04799097 0.00981324 0.10305676
  0.11804083 0.04609315 0.17034157 0.22102867]
 [0.07775026 0.08925943 0.13735609 0.12302603 0.15207733 0.12242215
  0.1251899  0.06114255 0.07740277 0.0343735 ]
 [0.19202908 0.04221039 0.12593194 0.08826102 0.03880874 0.08354088
  0.05505991 0.01333604 0.22638142 0.13444059]
 [0.0115246  0.08844235 0.0929364  0.19397308 0.09905786 0.06596863
  0.18586824 0.08572838 0.13763662 0.03886385]
 [0.04224256 0.03963463 0.15308756 0.12695038 0.13388806 0.13203123
  0.09750165 0.13342107 0.03792463 0.10331823]]
32/32 [==============================] - 1s 21ms/step - loss: 0.5214 - accuracy: 0.847
63/63 [==============================] - 1s 21ms/step - loss: 1.0489 - accuracy: 0.701


 Gradual corrected self-training:
63/63 [==============================] - 2s 22ms/step - loss: 1.8831 - accuracy: 0.846
63/63 [==============================] - 1s 22ms/step - loss: 3.2008 - accuracy: 0.754
63/63 [==============================] - 2s 27ms/step - loss: 2.8710 - accuracy: 0.755
63/63 [==============================] - 1s 21ms/step - loss: 4.6116 - accuracy: 0.684
63/63 [==============================] - 1s 21ms/step - loss: 3.9486 - accuracy: 0.696
63/63 [==============================] - 1s 21ms/step - loss: 5.6769 - accuracy: 0.610
63/63 [==============================] - 1s 21ms/step - loss: 4.2194 - accuracy: 0.683
63/63 [==============================] - 1s 21ms/step - loss: 6.1129 - accuracy: 0.569
63/63 [==============================] - 2s 27ms/step - loss: 5.0134 - accuracy: 0.663
63/63 [==============================] - 1s 22ms/step - loss: 7.9898 - accuracy: 0.532
63/63 [==============================] - 1s 22ms/step - loss: 6.2962 - accuracy: 0.483


 Direct train on target:
63/63 [==============================] - 2s 22ms/step - loss: 0.9526 - accuracy: 0.768


 Gradual self-training:
63/63 [==============================] - 2s 21ms/step - loss: 1.9425 - accuracy: 0.839
63/63 [==============================] - 1s 21ms/step - loss: 3.2295 - accuracy: 0.765
63/63 [==============================] - 1s 21ms/step - loss: 2.9066 - accuracy: 0.781
63/63 [==============================] - 1s 21ms/step - loss: 3.7881 - accuracy: 0.727
63/63 [==============================] - 1s 21ms/step - loss: 3.7017 - accuracy: 0.740
63/63 [==============================] - 2s 27ms/step - loss: 5.1212 - accuracy: 0.674
63/63 [==============================] - 1s 22ms/step - loss: 4.9778 - accuracy: 0.677
63/63 [==============================] - 1s 22ms/step - loss: 6.7238 - accuracy: 0.581
63/63 [==============================] - 1s 22ms/step - loss: 5.7740 - accuracy: 0.628
63/63 [==============================] - 1s 22ms/step - loss: 7.3486 - accuracy: 0.568
63/63 [==============================] - 1s 21ms/step - loss: 5.0898 - accuracy: 0.543
Source model acc on target: 0.7017543911933899
gradual self training accuracies: 0.5438596606254578
gradual labelshift corrected self training accuracies: 0.48370927572250366
oracle accuracy: 0.7684210538864136