# This is a example notebook for loading the the GTX dataset.

In [1]:
from dataset import *
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm
2023-06-06 21:08:17.626769: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Time-series datasets 
Below are the sample codes to access the dataset objects. The files are large, which could take some time to load.

In [2]:
hai = HAIDataset(contents="train")
x, y, a = hai[0] # x.shape = a.shape = (86,)
print(f"x shape: {x.shape}")
print(f"y is: {y}")
print(f"a shape: {a.shape}")


x shape: torch.Size([86])
y is: 0
a shape: torch.Size([86])


In [3]:
hai_slide = HAISlidingDataset(window_size=100, 
                              stride=1, 
                              contents="train")
x, y, a, l = hai_slide[0] # x.shape = a.shape = (99,86), l is the last timestamp in the window
print(f"x shape: {x.shape}")
print(f"y is: {y}")
print(f"a shape: {a.shape}")
print(f"l shape: {l.shape}")

# of valid windows: 1003808
x shape: torch.Size([99, 86])
y is: 0
a shape: torch.Size([99, 86])
l shape: torch.Size([86])


In [3]:
swat = SWaTDataset(contents="train")
x, y, a = swat[0] # x.shape = a.shape = (51,)
print(f"x shape: {x.shape}")
print(f"y is: {y}")
print(f"a shape: {a.shape}")

x shape: torch.Size([51])
y is: 0
a shape: torch.Size([51])


In [9]:
swat_slide = SWaTSlidingDataset(window_size=100, 
                              stride=1, 
                              contents="train")
x, y, a, l = swat_slide[0] # x.shape = a.shape = (99,51), l is the last timestamp in the window
print(f"x shape: {x.shape}")
print(f"y is: {y}")
print(f"a shape: {a.shape}")
print(f"l shape: {l.shape}")

# of valid windows: 496701
x shape: torch.Size([99, 51])
y is: 0
a shape: torch.Size([99, 51])
l shape: torch.Size([51])


In [4]:
wadi = WADIDataset(contents="train")
x, y, a = wadi[0] # x.shape = a.shape = (127,)
print(f"x shape: {x.shape}")
print(f"y is: {y}")
print(f"a shape: {a.shape}")

x shape: torch.Size([127])
y is: 0.0
a shape: torch.Size([127])


In [10]:
wadi_slide = WADISlidingDataset(window_size=100, 
                              stride=1, 
                              contents="train")
x, y, a, l = wadi_slide[0] # x.shape = a.shape = (99,127), l is the last timestamp in the window
print(f"x shape: {x.shape}")
print(f"y is: {y}")
print(f"a shape: {a.shape}")
print(f"l shape: {l.shape}")

# of valid windows: 1209502
x shape: torch.Size([99, 127])
y is: 0
a shape: torch.Size([99, 127])
l shape: torch.Size([127])


## Image dataset

In [13]:
mvtec = MVTecDataset("hazelnut", input_size=256, is_train=True)
x, y, a = mvtec[0] # x.shape = (3, 256, 256) a.shape = (1, 256, 256)
print(f"x shape: {x.shape}")
print(f"y is: {y}")
print(f"a shape: {a.shape}")

x shape: torch.Size([3, 256, 256])
y is: 0
a shape: torch.Size([1, 256, 256])


## Text dataset

In [15]:
squad = SquadDataset("roberta-base", is_train=True)
ret, roberta = squad[0], squad.tokenizer
question_mask, context_mask = ret[6].bool(), (1-ret[6]).bool()
x = roberta.decode(ret[0][context_mask]) # context
y = roberta.decode(ret[0][question_mask]) # question
a = roberta.decode(ret[0][ret[3]:ret[4]]) # answer
print(f"x: {x}")
print(f"y: {y}")
print(f"a: {a}")

loading cache from data/squad/cache/train_roberta-base_384.cache
x: <s> The difference in the above factors for the case of θ=0 is the reason that most broadcasting (transmissions intended for the public) uses vertical polarization. For receivers near the ground, horizontally polarized transmissions suffer cancellation. For best reception the receiving antennas for these signals are likewise vertically polarized. In some applications where the receiving antenna must work in any position, as in mobile phones, the base station antennas use mixed polarization, such as linear polarization at an angle (with both vertical and horizontal components) or circular polarization.
y: What is one use that would require an antenna to receive signals in various ways at once?</s></s></s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad

# This is the sample use of dataloaders.

In [23]:
# dataloder for HAI
ret = get_hai_dataloaders(normalize = False, train_batch_size = 32,
                          valid_batch_size = 32, 
                          mix_good_and_anom = True,
                          train_frac = 0.7, seed = 1234)
train_dataloader  = ret["train_dataloader"]
valid_dataloader = ret["valid_dataloader"]



In [24]:
# dataloder for SWaT slinding
ret = get_swat_sliding_dataloaders(window_size=100, stride=50,
                                   train_batch_size = 32,
                                    valid_batch_size = 32, 
                                    mix_good_and_anom = True,
                                    train_frac = 0.7, seed = 1234)
train_dataloader  = ret["train_dataloader"]
valid_dataloader = ret["valid_dataloader"]

# of valid windows: 9935
# of valid windows: 8993


In [20]:
ret = get_mvtec_dataloaders("all", 
                            train_batch_size = 32,
                            valid_batch_size = 32, 
                            mix_good_and_anom = True,
                            train_frac = 0.7, seed = 1234)
train_dataloader  = ret["train_dataloader"]
valid_dataloader = ret["valid_dataloader"]

In [21]:
ret = get_squad_dataloaders(tokenizer_or_name = "roberta-base", 
                            train_batch_size = 32,
                            valid_batch_size = 32)
train_dataloader  = ret["train_dataloader"]
valid_dataloader = ret["valid_dataloader"]

loading cache from data/squad/cache/train_roberta-base_384.cache
loading cache from data/squad/cache/eval_roberta-base_384.cache
