In [133]:
import numpy as np 
import torch 
import torch.optim as optim 
import torch.nn as nn 
import torch.functional as F
from  torch.utils.data import DataLoader , TensorDataset  
from sklearn.datasets import make_moons 
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import confusion_matrix, roc_curve , precision_recall_curve , auc 
import os 
import sys 

In [134]:
%run -i generate_dataset.py

In [135]:
image , lables = generate_dataset(img_size= 5 , 
                                  n_images = 300 , 
                                  binary = True , 
                                  seed = 13)

In [136]:
#data preparaation 
x_tensor = torch.as_tensor(image/255)
y_tensor = torch.as_tensor(lables.reshape(-1 , 1)).float()
x_tensor.shape , y_tensor.shape

(torch.Size([300, 1, 5, 5]), torch.Size([300, 1]))

In [177]:
#data transformation 
from torch.utils.data import Dataset 
class TransformaTensorDataset(Dataset):
    def __init__(self , x ,y , transform = None ):
        self.x = x 
        self.y = y 
        self.transform  = transform 
        
    def __getitem__(self, index):
        x_item = self.x[index]
        y_item = self.y[index]

        if self.transform:
            x_item = self.transform(x_item)

        return x_item , y_item
    
    def __len__(self):
        return len(self.x)


In [178]:
from torchvision import datasets , transforms

In [179]:
composer = transforms.Compose([transforms.RandomHorizontalFlip(p = 0.5 ) ,
                               transforms.Normalize(mean=(.5) , std = (.5))])
datasets = TransformaTensorDataset(x_tensor , y_tensor , composer )

In [180]:
from torch.utils.data import random_split 

In [181]:
#index splitter 
def index_splitter(n , splits , seed = 13):
    idx = torch.arange(n)
    splits = torch.as_tensor(splits)
    multiplyer = n / splits.sum()
     
    splits = (multiplyer * splits).long()
    diff = n - splits.sum() 
    splits[0] = splits[0]   + diff 

    torch.manual_seed(42)
    return random_split(idx , splits)

    

In [182]:
train_idx , val_idx = index_splitter(len(x_tensor ) , [80 , 20])

In [183]:
train_idx.indices

[42,
 61,
 286,
 160,
 30,
 265,
 172,
 205,
 218,
 86,
 38,
 50,
 182,
 94,
 49,
 22,
 23,
 71,
 237,
 208,
 14,
 65,
 63,
 149,
 263,
 113,
 295,
 273,
 187,
 56,
 0,
 250,
 127,
 274,
 151,
 209,
 24,
 10,
 230,
 276,
 106,
 231,
 211,
 201,
 36,
 53,
 223,
 155,
 165,
 17,
 120,
 216,
 238,
 148,
 3,
 13,
 259,
 152,
 6,
 110,
 206,
 128,
 40,
 247,
 270,
 166,
 171,
 194,
 103,
 181,
 267,
 1,
 76,
 196,
 134,
 284,
 191,
 256,
 80,
 102,
 294,
 157,
 176,
 156,
 62,
 5,
 72,
 285,
 91,
 163,
 277,
 28,
 87,
 261,
 137,
 150,
 229,
 121,
 129,
 161,
 170,
 275,
 219,
 212,
 249,
 39,
 239,
 235,
 97,
 131,
 115,
 174,
 107,
 52,
 266,
 33,
 116,
 81,
 251,
 138,
 179,
 45,
 101,
 60,
 19,
 204,
 48,
 297,
 243,
 12,
 18,
 67,
 68,
 44,
 260,
 268,
 74,
 192,
 55,
 57,
 29,
 70,
 207,
 9,
 175,
 142,
 125,
 245,
 180,
 117,
 58,
 167,
 146,
 193,
 242,
 287,
 258,
 292,
 190,
 26,
 123,
 221,
 199,
 215,
 126,
 173,
 291,
 188,
 111,
 178,
 92,
 296,
 64,
 144,
 213,
 75,
 262,
 10

In [184]:
from torch.utils.data import SubsetRandomSampler

In [185]:
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

In [186]:
#build a loader of each sets 

train_loader = DataLoader(dataset= datasets  , 
                          batch_size = 16 , sampler = train_sampler )

val_loader = DataLoader(dataset = datasets , batch_size = 16 , sampler = val_sampler)



In [187]:
 #check if loaders are returning corect number of mini -batches 
len(iter(train_loader))

15

In [188]:
len(iter(val_loader))

4

### data augumentation 

In [189]:
x_train_tensor  =  x_tensor[train_idx]
x_val_tensor  =  x_tensor[val_idx]
y_train_tensor  =  y_tensor[train_idx]
y_val_tensor  =  y_tensor[val_idx]


In [190]:
#composer 
train_composer = transforms.Compose([transforms.RandomHorizontalFlip(p = .5), 
                                                                     transforms.Normalize(mean = (.5) , std = (.5))])

val_composer = transforms.Compose([transforms.RandomHorizontalFlip(p = .5) , transforms.Normalize(mean = (.5) , std = (.5))])


In [191]:
#create their corresponding data loader

train_dataset = TransformaTensorDataset( x_train_tensor , y_train_tensor , transform = train_composer     
)

val_dataset = TransformaTensorDataset(x_val_tensor , y_val_tensor , transform= val_composer)


In [192]:
#builds a loader of each set 

train_loader =  DataLoader(
    dataset  = train_dataset , batch_size = 16 , shuffle= True 
)
val_loader = DataLoader(dataset= val_dataset , batch_size = 16)

In [193]:
from torch.utils.data import WeightedRandomSampler

In [194]:
classes , counts = y_train_tensor.unique(return_counts = True)

In [195]:
1.0 / counts.float()

tensor([0.0119, 0.0064])

In [196]:
weights = 1.0 / counts.float()
sample_weights = weights[y_train_tensor.squeeze().long()]

In [197]:
generator = torch.Generator()
sampler = WeightedRandomSampler(
  weights=sample_weights,
  num_samples=len(sample_weights),
  generator=generator,
  replacement=True
)


In [198]:
#again 
train_loader = DataLoader(
    dataset= train_dataset , batch_size = 16 , sampler = sampler 
)
val_loader = DataLoader(dataset= val_dataset , batch_size= 16)


In [199]:
sampler = make_balanced_sampler(y_train_tensor)

In [200]:
def make_balanced_sampler(y):
    classes ,counts = y.unique(return_counts = True)
    weights = 1.0 /counts.float()
    sample_weights  = weights[y.squeeze().long()]
    #build sampler with compute weights
    
    generator = torch.Generator()
    sampler = WeightedRandomSampler(
        weights = sample_weights , 
        num_samples= len(sample_weights), 
        generator = generator ,  
        replacement = True 
    )
    return sampler 

In [201]:
([t[1].sum() for t in iter(train_loader)])

[tensor(9.),
 tensor(8.),
 tensor(9.),
 tensor(6.),
 tensor(5.),
 tensor(7.),
 tensor(7.),
 tensor(10.),
 tensor(8.),
 tensor(3.),
 tensor(6.),
 tensor(5.),
 tensor(9.),
 tensor(9.),
 tensor(7.)]

In [None]:
def set_seed(self , seed = 42):
    torch.backends.cudnn.deterministic = True 
    torch.backends.cudnn.benchmark = False 
    torch.manual_seed(seed)
    

In [28]:
a = 0 
for i in val_idx:
    print(i)
    a = a + 1

a 

tensor(241)
tensor(41)
tensor(222)
tensor(186)
tensor(282)
tensor(4)
tensor(77)
tensor(82)
tensor(158)
tensor(159)
tensor(234)
tensor(145)
tensor(69)
tensor(283)
tensor(95)
tensor(27)
tensor(244)
tensor(21)
tensor(93)
tensor(164)
tensor(25)
tensor(246)
tensor(66)
tensor(112)
tensor(2)
tensor(124)
tensor(279)
tensor(83)
tensor(141)
tensor(54)
tensor(43)
tensor(139)
tensor(200)
tensor(177)
tensor(16)
tensor(89)
tensor(198)
tensor(254)
tensor(35)
tensor(183)
tensor(90)
tensor(298)
tensor(169)
tensor(257)
tensor(154)
tensor(84)
tensor(109)
tensor(132)
tensor(15)
tensor(253)
tensor(202)
tensor(281)
tensor(96)
tensor(289)
tensor(46)
tensor(98)
tensor(7)
tensor(78)
tensor(20)
tensor(210)


60

In [27]:
a = 0 
for i in train_idx:
    print(i)
    a = a + 1

a 

tensor(42)
tensor(61)
tensor(286)
tensor(160)
tensor(30)
tensor(265)
tensor(172)
tensor(205)
tensor(218)
tensor(86)
tensor(38)
tensor(50)
tensor(182)
tensor(94)
tensor(49)
tensor(22)
tensor(23)
tensor(71)
tensor(237)
tensor(208)
tensor(14)
tensor(65)
tensor(63)
tensor(149)
tensor(263)
tensor(113)
tensor(295)
tensor(273)
tensor(187)
tensor(56)
tensor(0)
tensor(250)
tensor(127)
tensor(274)
tensor(151)
tensor(209)
tensor(24)
tensor(10)
tensor(230)
tensor(276)
tensor(106)
tensor(231)
tensor(211)
tensor(201)
tensor(36)
tensor(53)
tensor(223)
tensor(155)
tensor(165)
tensor(17)
tensor(120)
tensor(216)
tensor(238)
tensor(148)
tensor(3)
tensor(13)
tensor(259)
tensor(152)
tensor(6)
tensor(110)
tensor(206)
tensor(128)
tensor(40)
tensor(247)
tensor(270)
tensor(166)
tensor(171)
tensor(194)
tensor(103)
tensor(181)
tensor(267)
tensor(1)
tensor(76)
tensor(196)
tensor(134)
tensor(284)
tensor(191)
tensor(256)
tensor(80)
tensor(102)
tensor(294)
tensor(157)
tensor(176)
tensor(156)
tensor(62)
tensor(5)
ten

240