In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import csv
import cv2
import numpy as np
import random
import os

from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torchvision.transforms.functional import to_tensor

from collections import OrderedDict

In [4]:
TRAIN_PATH = "/content/drive/MyDrive/hw5/captcha-hacker/train"
TEST_PATH = "/content/drive/MyDrive/hw5/captcha-hacker/test"
device = "cuda"

In [5]:
characters = '0123456789abcdefghijklmnopqrstuvwxyz'

In [6]:
class taskData(Dataset):
  def __init__(self, data, root):
    self.root = root
    self.data = data

  def __getitem__(self, index):
    filename, label = self.data[index]
    img = to_tensor(cv2.imread(f"{self.root}/{filename}"))
    return img, filename

  def __len__(self):
    return len(self.data)

In [7]:
test_data1 = []
test_data2 = []
test_data3 = []

with open(f'{TEST_PATH}/../sample_submission.csv', newline='') as csvfile:
  for row in csv.reader(csvfile, delimiter=','):
    if row[0].startswith("task1"):
      test_data1.append(row)
    elif row[0].startswith("task2"):
      test_data2.append(row)
    elif row[0].startswith("task3"):
      test_data3.append(row)

ds1 = taskData(test_data1, root=TEST_PATH)
ds2 = taskData(test_data2, root=TEST_PATH)
ds3 = taskData(test_data3, root=TEST_PATH)

In [8]:
class Model(nn.Module):
  def __init__(self): # the output should be limited from 0 to z
    super(Model, self).__init__()
    channels = [32, 64, 128, 256, 256]
    pools = [2, 2, 2, 2, (2, 1)]
    modules = OrderedDict()
    def blocks(name, in_channels, out_channels):
        modules[f'conv{name}'] = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=(1, 1))
        modules[f'batchnorm{name}'] = nn.BatchNorm2d(out_channels)
        modules[f'relu{name}'] = nn.ReLU(inplace=True)  
    last_channel = 3
    for block, (n_channel, k_pool) in enumerate(zip(channels, pools)):
        for layer in range(1, 3):
            blocks(f'{block+1}{layer}', last_channel, n_channel)
            last_channel = n_channel
        modules[f'maxpool{block + 1}'] = nn.MaxPool2d(k_pool)
    modules[f'dropout'] = nn.Dropout(0.25, inplace=True)   
    self.cnn = nn.Sequential(modules)
    self.lstm = nn.LSTM(input_size=512, hidden_size=128, num_layers=2, bidirectional=True)
    self.final = nn.Linear(in_features=256, out_features=36)

  def forward(self, x):
    x = self.cnn(x)
    x = x.reshape(x.shape[0], -1, x.shape[-1])
    x = x.permute(2, 0, 1)
    x, _ = self.lstm(x)
    x = self.final(x)
    return x

In [9]:
def decode(sequence, num):
  a = ''.join([characters[x] for x in sequence])
  if num == 1:
    return a[0]
  elif num == 2:
    return a[0] + a[3]
  else:
    s = ''.join([x for j, x in enumerate(a[:-1]) if x != characters[0] and j != 0])
    if len(s) < 2 and a[3] == '0' and a[4] == '0':
      s += '0'
    if len(s) < 2 and a[1] == '0' and a[2] == '0':
      s = '0' + s
    s = a[0] + s
    if len(s) == 4:
      if s[2] != s[3]:
        s = "".join(sorted(set(s), key=s.index))
      else:
        s = s[:3]
    s += a[-1]
    return s

In [15]:
!cp -r ./drive/MyDrive/hw5/captcha-hacker/* .

In [11]:
model1 = Model().to(device)
model1 = torch.load('model1.pth')
model1.eval()

with open('submission.csv', 'w', newline='') as csvfile:
  csv_writer = csv.writer(csvfile, delimiter=',')
  csv_writer.writerow(["filename", "label"])
  for image, filenames in ds1:
    output = model1(image.unsqueeze(0).to(device))
    outputs = output.detach().permute(1, 0, 2).argmax(dim=-1)
    csv_writer.writerow([filenames, decode(outputs[0], 1)])

In [12]:
model2 = Model().to(device)
model2 = torch.load('model2.pth')
model2.eval()

with open('submission.csv', 'a', newline='') as csvfile:
  csv_writer = csv.writer(csvfile, delimiter=',')
  for image, filename in ds2:
    output = model2(image.unsqueeze(0).to(device))
    outputs = output.detach().permute(1, 0, 2).argmax(dim=-1)
    csv_writer.writerow([filename, decode(outputs[0], 2)])

In [13]:
model3 = Model().to(device)
model3 = torch.load('model3.pth')
model3.eval()

with open('submission.csv', 'a', newline='') as csvfile:
  csv_writer = csv.writer(csvfile, delimiter=',')
  for image, filename in ds3:
    output = model3(image.unsqueeze(0).to(device))
    outputs = output.detach().permute(1, 0, 2).argmax(dim=-1)
    csv_writer.writerow([filename, decode(outputs[0], 3)])

In [14]:
!cp submission.csv ./drive/MyDrive/hw5/captcha-hacker/submission.csv 