In [1]:
from datastore.data import P3B3
from datastore.data import RandomData
from datastore.data import KuzushijiMNIST
from datastore.data import RandomMultiTaskData
from datastore.sampling import leave_one_out_bootstrap

from torch.utils.data import DataLoader

In [2]:
# Random Dataset
random_data = RandomData(num_samples=10, num_classes=2, seed=None)
random_samples = leave_one_out_bootstrap(random_data, num_bootstraps=2)

In [3]:
# Multitask Random Dataset
random_multitask_data = RandomMultiTaskData(num_samples=10, num_tasks=2, num_classes=2, seed=None)
random_multitask_samples = leave_one_out_bootstrap(random_multitask_data, num_bootstraps=2)

In [4]:
def get_dataloaders(sample):
    trainloader = DataLoader(sample.train, batch_size=1)
    validloader = DataLoader(sample.test, batch_size=1)
    return trainloader, validloader

In [29]:
def get_set(dataloader, verbose=False):
    if verbose:
        print(f'number of samples: {len(dataloader)}')
        
    elems = set()
    for idx, (data, target) in enumerate(dataloader):
        print(data.item())
        elems.add(data.item())
        
        if verbose:
            print(data)
    
    return elems

In [30]:
def inspect_sample(sample, verbose=False):
    train_loader, valid_loader = get_dataloaders(sample)
    if verbose: print(f'Training set\n')
    train_elems = get_set(train_loader, verbose)
    if verbose: print(f'\nTest set\n')
    valid_elems = get_set(valid_loader, verbose)
    intersection = train_elems.intersection(valid_elems)
    print(f'\nIntersection of training and test sets: {intersection}')

In [31]:
def inspect_all_samples(samples, verbose=False):
    for idx, sample in enumerate(samples):
        print(f'\nSample {idx}')
        inspect_sample(sample, verbose)

In [32]:
inspect_all_samples(random_samples, verbose=True)


Sample 0
Training set

number of samples: 5
2.6832984987629867
tensor([2.6833], dtype=torch.float64)
0.9577038977483122
tensor([0.9577], dtype=torch.float64)
0.33135903638787345
tensor([0.3314], dtype=torch.float64)
1.2582663134708885
tensor([1.2583], dtype=torch.float64)
1.2582663134708885
tensor([1.2583], dtype=torch.float64)

Test set

number of samples: 6
-1.0287581918171989
tensor([-1.0288], dtype=torch.float64)
1.4799927651470475
tensor([1.4800], dtype=torch.float64)
-1.0259006695060389
tensor([-1.0259], dtype=torch.float64)
-2.3434433491451214
tensor([-2.3434], dtype=torch.float64)
-0.6387077316623777
tensor([-0.6387], dtype=torch.float64)
0.5758117840230111
tensor([0.5758], dtype=torch.float64)

Intersection of training and test sets: set()

Sample 1
Training set

number of samples: 5
-1.0259006695060389
tensor([-1.0259], dtype=torch.float64)
2.6832984987629867
tensor([2.6833], dtype=torch.float64)
1.2582663134708885
tensor([1.2583], dtype=torch.float64)
2.6832984987629867
ten

In [33]:
inspect_all_samples(random_multitask_samples, verbose=True)


Sample 0
Training set

number of samples: 5
0.6521309001923868
tensor([0.6521], dtype=torch.float64)
0.19561316850635524
tensor([0.1956], dtype=torch.float64)
0.5364819254933619
tensor([0.5365], dtype=torch.float64)
0.2530948608866341
tensor([0.2531], dtype=torch.float64)
-1.006049656140366
tensor([-1.0060], dtype=torch.float64)

Test set

number of samples: 5
0.8902146910687551
tensor([0.8902], dtype=torch.float64)
1.1797170972411
tensor([1.1797], dtype=torch.float64)
-1.1006763988577402
tensor([-1.1007], dtype=torch.float64)
0.6598202935959029
tensor([0.6598], dtype=torch.float64)
0.3384112573869376
tensor([0.3384], dtype=torch.float64)

Intersection of training and test sets: set()

Sample 1
Training set

number of samples: 5
0.8902146910687551
tensor([0.8902], dtype=torch.float64)
0.6521309001923868
tensor([0.6521], dtype=torch.float64)
0.2530948608866341
tensor([0.2531], dtype=torch.float64)
0.3384112573869376
tensor([0.3384], dtype=torch.float64)
0.3384112573869376
tensor([0.338