In [None]:
import pickle
import numpy as np
from skimage import io
import torch

from tqdm import tqdm, tqdm_notebook
from PIL import Image
from pathlib import Path

from torchvision import transforms
from multiprocessing.pool import ThreadPool
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

from matplotlib import colors, pyplot as plt
%matplotlib inline

# в sklearn не все гладко, чтобы в colab удобно выводить картинки
# мы будем игнорировать warnings
import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

In [None]:
if torch.cuda.is_available():
 device = torch.device('cuda:0');
else:
 device = torch.device('cpu');

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from google.colab import files
uploaded = files.upload()

KeyboardInterrupt: ignored

In [None]:
!unzip -q /content/gdrive/My\ Drive/simpsons/data/dataset.zip -d train
!unzip -q /content/gdrive/My\ Drive/simpsons/data/testset.zip -d test

unzip:  cannot find or open /content/gdrive/My Drive/simpsons/data/dataset.zip, /content/gdrive/My Drive/simpsons/data/dataset.zip.zip or /content/gdrive/My Drive/simpsons/data/dataset.zip.ZIP.
unzip:  cannot find or open /content/gdrive/My Drive/simpsons/data/testset.zip, /content/gdrive/My Drive/simpsons/data/testset.zip.zip or /content/gdrive/My Drive/simpsons/data/testset.zip.ZIP.


In [None]:
!ls gdrive/MyDrive

'1 сентября'		  'Stany Zjednoczone.gslides'
'Colab Notebooks'	   train
 Cukrzyca.gslides	  'Деревня Вилли'
'Franchising i SOB.gdoc'  'Как создавать запоминающиеся презентации (1).gslides'
 MNIST.ipynb		  'Как создавать запоминающиеся презентации.gslides'
 seminar_12.ipynb	   форд


In [None]:
!ls sample_data

anscombe.json		     california_housing_train.csv  mnist_train_small.csv
california_housing_test.csv  mnist_test.csv		   README.md


In [None]:
!ls gdrive

MyDrive


In [None]:
DATA_MODES = ['train', 'val', 'test']

In [None]:
!nvidia-smi

Fri Dec  1 19:02:04 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   47C    P8    11W /  70W |      3MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
RESCALE_SIZE = 244

In [None]:
DEVICE = torch.device('cuda')

In [None]:
class SimpsonDataset(Dataset):
  def __init__(self, files, mode):
    super().__init__()
    self.files = sorted(files)
    self.mode = mode

    if self.mode not in DATA_MODES:
      print(f"{self.mode} is not correct; correct models: {DATA_MODES}!")
      raise NameError

    self.len_ = len(self.files)

    self.label_encoder = LabelEncoder()

    if self.mode != 'test':
      self.labels = [path.parent.name for path in self.files]
      self.label_encoder.fit(self.labels)

      with open('label_encoder.pkl', 'wb') as le_dump_file:
        pickle.dump(self.label_encoder, le_dump_file)

    def __len__(self):
      return self.len_

    def load_sample(self, file):
      image = Image.open(file)
      image.load()
      return image

    def __getitem__(self, index):
      transform = transforms.Compose([
          transforms.ToTensor(),
          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
      ])
      x = self.load_sample(self.files[index])
      x = self._prepare_sample(x)
      x = np.array(x/255, dtype = 'float32')
      x = transform(x)
      if self.mode == 'test':
        return x
      else:
        label = self.labels[index]
        label_id = self.label_encoder.transform([label])
        y = label_id.item()
        return x, y

    def _prepare_sample(self, image):
      image = image.resize((RESCALE_SIZE, RESCALE_SIZE))
      return np.array(image)

In [None]:
def imshow(inp, title = None, plt_ax = plt, default = None):
  inp = inp.numpy().transpose((1, 2, 0))
  mean = np.array([0.485, 0.456, 0.406])
  std = np.array([0.229, 0.224, 0.225])
  inp = std * inp + mean
  inp = np.clip(inp, 0, 1)
  plt_ax.imshow(inp)
  if title is not None:
    plt_ax.set_title(title)
  plt_ax.grid(False)

In [None]:
TRAIN_DIR = Path('train/dataset')
TEST_DIT = Path('test/testset')

train_val_files = sorted(list(TRAIN_DIR.rglob('*.jpg')))
test_fiels = sorted(list(TEST_DIR.rglob('*.jpg')))

NameError: ignored