In [96]:
#!/usr/bin/env python3
import argparse

import numpy as np
import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection

import torch
import torch.nn as nn
import torch.optim as optim

import pprint
pp = pprint.PrettyPrinter()

parser = argparse.ArgumentParser()
# These arguments will be set appropriately by ReCodEx, even if you change them.
parser.add_argument("--batch_size", default=10, type=int, help="Batch size")
parser.add_argument("--classes", default=5, type=int, help="Number of classes to use")
parser.add_argument("--data_size", default=200, type=int, help="Data size")
parser.add_argument("--epochs", default=2, type=int, help="Number of SGD training epochs")
parser.add_argument("--learning_rate", default=0.01, type=float, help="Learning rate")
parser.add_argument("--recodex", default=False, action="store_true", help="Running in ReCodEx")
parser.add_argument("--seed", default=42, type=int, help="Random seed")
parser.add_argument("--test_size", default=0.5, type=lambda x: int(x) if x.isdigit() else float(x), help="Test size")
# If you add more arguments, ReCodEx will keep them with your default values.
args = parser.parse_args([] if "__file__" not in globals() else None)   

In [97]:
# Create a random generator with a given seed.
generator = np.random.RandomState(args.seed)

# Generate an artificial classification dataset.
data, target_list = sklearn.datasets.make_multilabel_classification(
    n_samples=args.data_size, n_classes=args.classes, allow_unlabeled=False,
    return_indicator=False, random_state=args.seed)

# TODO: The `target` is a list of classes for every input example. Convert
# it to a dense representation (n-hot encoding) -- for each input example,
# the target should be vector of `args.classes` binary indicators.

# create the list of zeros with row size = len(target_list0 and column size = args.classes
target = np.zeros((len(target_list), args.classes), dtype=np.float32)

# assign 1 with the index contained in "labels"
# ex.) target[0, [0,2]] = 1.0
# then produce [1, 0, 1, 0]
for i, labels in enumerate(target_list):
    target[i, labels] = 1.0

In [110]:
# Append a constant feature with value 1 to the end of all input data.
# Then we do not need to explicitly represent bias - it becomes the last weight.
data = np.pad(data, [(0, 0), (0, 1)], constant_values=1)

# Split the dataset into a train set and a test set.
# Use `sklearn.model_selection.train_test_split` method call, passing
# arguments `test_size=args.test_size, random_state=args.seed`.
train_data, test_data, train_target, test_target = sklearn.model_selection.train_test_split(
    data, target, test_size=args.test_size, random_state=args.seed)

train_data = torch.tensor(train_data, dtype=torch.float32)
test_data = torch.tensor(test_data, dtype=torch.float32)
train_target = torch.tensor(train_target, dtype=torch.float32)
test_target = torch.tensor(test_target, dtype=torch.float32)

model = MultilayerPerceptron(train_data.shape[1], 100 , args.classes)  # We do not explicitly specify the weigh initialization interval
loss_f = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=args.learning_rate)

# Generate initial model weights.
weights = generator.uniform(size=[train_data.shape[1], args.classes], low=-0.1, high=0.1)

TP = torch.zeros(args.classes, dtype=torch.long)
TN = torch.zeros(args.classes, dtype=torch.long)
FP = torch.zeros(args.classes, dtype=torch.long)
FN = torch.zeros(args.classes, dtype=torch.long)

test_TP = torch.zeros(args.classes, dtype=torch.long)
test_TN = torch.zeros(args.classes, dtype=torch.long)
test_FP = torch.zeros(args.classes, dtype=torch.long)
test_FN = torch.zeros(args.classes, dtype=torch.long)

In [111]:
for epoch in range(args.epochs):
    permutation = generator.permutation(train_data.shape[0])

    # TODO: Process the data in the order of `permutation`. For every
    # `args.batch_size` of them, average their gradient, and update the weights.
    # You can assume that `args.batch_size` exactly divides `train_data.shape[0]`.
    b_size = args.batch_size
    for i in range(0, train_data.shape[0], b_size):
        indices= permutation[i : i + b_size]
        batch_train = train_data[indices]
        batch_target = train_target[indices]
        # batch_target_probs = torch.nn.functional.one_hot(batch_target, num_classes=args.classes).type(torch.float32)
        
        optimizer.zero_grad()
        pred = model(batch_train)
        loss = loss_f(pred, batch_target)
        loss.backward()
        optimizer.step()

# # TODO: After the SGD epoch, compute the micro-averaged and the
# # macro-averaged F1-score for both the train test and the test set.
# # Compute these scores manually, without using `sklearn.metrics`.
# train_f1_micro, train_f1_macro, test_f1_micro, test_f1_macro = ...
    train_pred = (torch.sigmoid(model(train_data))>= 0.5)
    test_pred = (torch.sigmoid(model(test_data))>= 0.5)

    train_f1_micro, train_f1_macro = f1_micro_macro(y_train, train_target)
    test_f1_micro, test_f1_macro = f1_micro_macro(y_test, test_target)


# print("After epoch {}: trai F1 micro {:.2f}% macro {:.2f}%, test F1 micro {:.2f}% macro {:.1f}%".format(
#     epoch + 1, 100 * train_f1_micro, 100 * train_f1_macro, 100 * test_f1_micro, 100 * test_f1_macro))

print(train_f1_micro.item())
print(train_f1_macro.item())

RuntimeError: one_hot is only applicable to index tensor of type LongTensor.

In [112]:
class LogisticRegression(nn.Module):
    def __init__(self, in_features, num_classes):
        super().__init__()
        self.linear = nn.Linear(in_features, num_classes)

    def forward(self, x):
        # No sigmoid here â€” BCEWithLogitsLoss applies it internally.
        return self.linear(x)

In [113]:
def f1_micro_macro(y_true, y_pred):
    y_true = y_true.bool()
    y_pred = y_pred.bool()
    
    tp = (pred_binary &  true).sum(dim=0)
    fp = (pred_binary & ~true).sum(dim=0)
    fn = (~pred_binary &  true).sum(dim=0)
    tn = (~pred_binary & ~true).sum(dim=0)
    
    TPmic = TP.sum()
    TNmic = TN.sum()
    FNmic = FN.sum()
    FPmic = FP.sum()

    f1_micro = (2*TPmic) / (TPmic*2 + FNmic + FPmic)
    f1_macro = ((2*TP) / (TP*2 + FN + FP)).mean()

    return f1_micro, f1_macro