# PyKale Tutorial: Domain Adaptation on Toy Data
| [Open in Colab](https://colab.research.google.com/github/pykale/pykale/blob/main/examples/toy_domain_adaptation/tutorial.ipynb) (click `Runtime` → `Run all (Ctrl+F9)` |  [Launch Binder](https://mybinder.org/v2/gh/pykale/pykale/HEAD?filepath=examples%2Ftoy_domain_adaptation%2Ftutorial.ipynb) (click `Run` → `Run All Cells`) |

### Setup

In [None]:
# import seaborn first to avoid seaborn import error caused by newer scipy version, to be solved later
import seaborn as sns

In [None]:
if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    !pip uninstall --yes imgaug && pip uninstall --yes albumentations && pip install git+https://github.com/aleju/imgaug.git
    !pip install git+https://github.com/pykale/pykale.git
    !git clone https://github.com/pykale/pykale.git
    %cd pykale/examples/toy_domain_adaptation
else:
    print('Not running on CoLab')

### Generate toy data

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import make_moons, make_blobs

In [None]:
n_samples = 1000

xs, ys = make_blobs(n_samples, centers=[[0, 0], [0, 2]], cluster_std=[0.3, 0.35])
xt, yt = make_blobs(n_samples, centers=[[2, -2], [2, 0.2]], cluster_std=[0.35, 0.4])

In [None]:
colors = ["c", "m"]
x_all = [xs, xt]
y_all = [ys, yt]
labels = ["source", "Target"]
for i in range(2):
    idx_pos = np.where(y_all[i] == 1)
    idx_neg = np.where(y_all[i] == 0)
    plt.scatter(x_all[i][idx_pos, 0], x_all[i][idx_pos, 1], c=colors[i], marker="o", alpha=0.4, 
                label=labels[i] + " positive")
    plt.scatter(x_all[i][idx_neg, 0], x_all[i][idx_neg, 1], c=colors[i], marker="x", alpha=0.4, 
                label=labels[i] + " negative")
plt.legend()
plt.title('Source domain and target domain blobs data',fontsize=14,fontweight='bold')

### Classification

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score
from kale.interpret.visualize import distplot_1d
from kale.pipeline.multi_domain_adapter import CoIRLS

#### Training a standard Ridge classifier

In [None]:
clf = RidgeClassifier(alpha=1.0)
clf.fit(xs, ys)

yt_pred = clf.predict(xt)
print('Accuracy on target domain: {:.2f}'.format(accuracy_score(yt, yt_pred)))

In [None]:
ys_score = clf.decision_function(xs)
yt_score = clf.decision_function(xt)

title = "Ridge classifier decision score distribution"
title_kwargs = {"fontsize": 14, "fontweight": "bold"}
hist_kwargs = {"kde": True, "alpha": 0.7}
plt_labels = ["Source", "Target"]
distplot_1d(
    [ys_score, yt_score],
    title=title,
    xlabel="Decision Scores",
    labels=plt_labels,
    hist_kwargs=hist_kwargs,
    title_kwargs=title_kwargs,
).show()

#### Training a domain adaptation classifier

In [None]:
clf_ = CoIRLS()
# encoding one-hot domain covariate matrix
covariates = np.zeros(n_samples * 2)
covariates[:n_samples] = 1
enc = OneHotEncoder(handle_unknown="ignore")
covariates_mat = enc.fit_transform(covariates.reshape(-1, 1)).toarray()

In [None]:
x = np.concatenate((xs, xt))
clf_.fit(x, ys, covariates_mat)
yt_pred_ = clf_.predict(xt)
print("Accuracy on target domain: {:.2f}".format(accuracy_score(yt, yt_pred_)))

In [None]:
ys_score_ = clf_.decision_function(xs).detach().numpy().reshape(-1)
yt_score_ = clf_.decision_function(xt).detach().numpy().reshape(-1)
plt.figure(figsize=(10, 5))
title = "Domain adaptation classifier decision score distribution"
distplot_1d(
    [ys_score_, yt_score_],
    title=title,
    xlabel="Decision Scores",
    labels=plt_labels,
    hist_kwargs=hist_kwargs,
    title_kwargs=title_kwargs,
).show()