In [3]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch.autograd import Variable
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
from tqdm import trange
from PIL import Image

%matplotlib inline
custom_style = {'axes.labelcolor': 'white',
                'xtick.color': 'white',
                'ytick.color': 'white'}
sns.set_style("darkgrid", rc=custom_style)
sns.set_context("notebook")
plt.style.use('dark_background')
plt.rcParams["font.size"] = 18

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms
from torchvision.datasets import MNIST
from torch.utils.data import Dataset


# PytorchでMNISTを学習

### 参考サイト
https://github.com/pytorch/examples/blob/master/mnist/main.py

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #畳み込み層を定義する
        #引数は順番に、サンプル数、チャネル数、フィルタのサイズ
        self.conv1 = nn.Conv2d(1, 10, kernel_size=3)
        #フィルタのサイズは正方形であればタプルではなく整数でも可（8行目と10行目は同じ意味）
        self.conv2 = nn.Conv2d(10, 20, kernel_size=3)
        #全結合層を定義する
        #fc1の第一引数は、チャネル数*最後のプーリング層の出力のマップのサイズ=特徴量の数
        
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(500, 500)
        self.fc2 = nn.Linear(500, 500)
        self.fc3 = nn.Linear(500, 10)
        
    def forward(self, x):
        #入力→畳み込み層1→活性化関数(ReLU)→プーリング層1(2*2)→出力
        # input 28 x 28 x 1
        # conv1 28 x 28 x 1 -> 24 x 24 x 10
        # max_pool(kernel2) 12 x 12 x 10
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2,2) )
        
        #入力→畳み込み層2→活性化関数(ReLU)→プーリング層2(2*2)→出力
        # conv2 12 x 12 x 10 -> 8 x 8 x 20
        # max_pool(kernel2) -> 4 x 4 x 20
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        x = self.conv2_drop(x)
        # output layer
        #x = x.view(-1, self.num_flat_features(x))
        # self.num_flat_featuresで特徴量の数を算出
        # flatten 4 x 4 x 20 = 320
        x = x.view(-1, self.num_flat_features(x))
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.log_softmax(x, dim=1)
        
        return x
    
    def num_flat_features(self, x):
        #Conv2dは入力を4階のテンソルとして保持する(サンプル数*チャネル数*縦の長さ*横の長さ)
        #よって、特徴量の数を数える時は[1:]でスライスしたものを用いる
        size = x.size()[1:] ## all dimensions except the batch dimension
        #特徴量の数=チャネル数*縦の長さ*横の長さを計算する
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


In [4]:
class TrainLogger(object):
    
    def __init__(self, out):
        try:
            os.makedirs(out)
        except OSError:
            pass
        self.file = open(os.path.join(out, 'log'), 'w')
        self.logs = []
        
    def write(self, log):
        ## write log
        tqdm.write(log)
        tqdm.write(log, file=self.file)
        self.logs.append(log)
        
    def state_dict(self):
        ## returns the state of the loggers
        return {'logs': self.logs}
    
    def load_state_dict(self, state_dict):
        ## load the logger state
        self.logs = state_dict['logs']
        #write logs
        tqdm.write(self.logs[-1])
        for log in self.logs:
            tqdm.write(log, file=self.file)
            

In [5]:
def checkpoint(net, optimizer, epoch, logger, out):
    filename = os.path.join(out, 'epoch-{}'.format(epoch))
    torch.save({'epoch': epoch + 1, 'logger': logger.state_dict()}, filename + '.iter')
    torch.save(net.state_dict(), filename + 'model')
    torch.save(optimizer.state_dict(), filename + 'state')

In [6]:
def train(model, device, train_loader, criterion, optimizer, epoch, log_interval, logger):
    model.train()
    for batch_id, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_id % log_interval == 0:
            log = 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_id * len(data), len(train_loader.dataset),
                100. * batch_id / len(train_loader), loss.item())
            logger.write(log)

In [7]:
def test(model, device, test_loader, criterion, logger):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    log = '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, total, 100. * correct / total)
    logger.write(log)

In [8]:
class MNISTDataSet(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, csv_file, root_dir, transform=None):

        self.image_dataframe = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.image_dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,
                                self.image_dataframe.loc[idx, 'img'])
#         image = io.imread(img_name)
        image = Image.open(img_name)
        image = image.convert('L')
        label = self.image_dataframe.loc[idx, 'label']

        if self.transform:
            image = self.transform(image)

        return image, label

In [9]:
def main():
    batch_size = 100
    test_bach_size = 100
    epochs = 30
    lr = 0.01
    momentum = 0.5
    no_cuda = False
    seed = 123
    log_interval = 10
    out_dir = './result'
    train_csv = '../data/mnist/train_big.csv'
    test_csv = '../data/mnist/test_big.csv'
    train_root_dir = '../data/mnist/train'
    test_root_dir = '../data/mnist/test'
    test_interval = 1
    resume_interval = 1
    
    use_cuda = not no_cuda and torch.cuda.is_available()    
    torch.manual_seed(seed)
    device = torch.device('cuda:1' if use_cuda else 'cpu')
    print(device)
    kwargs = {'num_workers': 8, 'pin_memory': True} if use_cuda else {}
    
    transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5,))])
    
#     trainset = datasets.MNIST(
#         root = './data', train=True, download=True,transform=transform
#     )
    trainset = MNISTDataSet(train_csv, train_root_dir, transform)
#     trainset = datasets.ImageFolder(train_root_dir, transform)
    
    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=batch_size, shuffle=True, **kwargs
    )
#     testset = datasets.MNIST(
#         root = './data', train=False, download=True,transform=transform
#     )
    testset = MNISTDataSet(test_csv, test_root_dir, transform)
#     testset = datasets.ImageFolder(test_root_dir, transform)
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=test_bach_size, shuffle=False, **kwargs
    )
    
    net = Net().to(device)
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
    criterion = nn.CrossEntropyLoss()
    logger = TrainLogger(out_dir)
    
    for epoch in range(1, epochs + 1):
        train(net, device, trainloader, criterion, optimizer, epoch, log_interval, logger)
        if epoch % test_interval == 0:
            test(net, device, testloader, criterion, logger)
        if epoch % resume_interval == 0:
            checkpoint(net, optimizer, epoch, logger, out_dir)

In [36]:
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5,))])
trainset = datasets.MNIST(root = '../data', train=True, download=True,transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=10, shuffle=True)

In [37]:
it = iter(trainloader)

In [38]:
img, label = it.next()

In [42]:
a = np.arange(25).reshape(5,5)
a = torch.Tensor(a)

RuntimeError: invalid argument 1: only one dimension can be inferred at /pytorch/aten/src/TH/THStorage.cpp:71

In [47]:
b.size()

torch.Size([1, 5, 5])

In [50]:
m = 10
np.arange(0, m+1,m // 10)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [124]:
from tqdm import tqdm
from tqdm import trange
from time import sleep

In [125]:
s = [s for s in range(10)]
for i in tqdm(s, desc='1st loop'):
    for j in trange(5, desc='2nd loop'):
        sleep(0.5)

1st loop:   0%|          | 0/10 [00:00<?, ?it/s]
2nd loop:   0%|          | 0/5 [00:00<?, ?it/s][A
2nd loop:  20%|██        | 1/5 [00:00<00:02,  1.99it/s][A
2nd loop:  40%|████      | 2/5 [00:01<00:01,  1.99it/s][A
2nd loop:  60%|██████    | 3/5 [00:01<00:01,  1.98it/s][A
2nd loop:  80%|████████  | 4/5 [00:02<00:00,  1.98it/s][A
2nd loop: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s][A
1st loop:  10%|█         | 1/10 [00:02<00:22,  2.53s/it]
2nd loop:   0%|          | 0/5 [00:00<?, ?it/s][A
2nd loop:  20%|██        | 1/5 [00:00<00:02,  1.99it/s][A
2nd loop:  40%|████      | 2/5 [00:01<00:01,  1.99it/s][A
2nd loop:  60%|██████    | 3/5 [00:01<00:01,  1.99it/s][A
2nd loop:  80%|████████  | 4/5 [00:02<00:00,  1.98it/s][A
2nd loop: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s][A
1st loop:  20%|██        | 2/10 [00:05<00:20,  2.53s/it]
2nd loop:   0%|          | 0/5 [00:00<?, ?it/s][A
2nd loop:  20%|██        | 1/5 [00:00<00:02,  1.99it/s][A
2nd loop:  40%|████      | 2/5 [00:

In [48]:
from time import sleep

In [62]:
tqdm.write('TEST')
for j in range(3):
#     print('{}回目'.format(j))
    for i in tqdm(range(3)):
        sleep(0.5)

        

  0%|          | 0/3 [00:00<?, ?it/s]

TEST


100%|██████████| 3/3 [00:01<00:00,  1.99it/s]
100%|██████████| 3/3 [00:01<00:00,  1.99it/s]
100%|██████████| 3/3 [00:01<00:00,  1.99it/s]


In [68]:
tqdm.write('HELLO')
t = trange(100, desc='Bar desc', leave=True)
for i in t:
    t.set_description("Bar desc (file %i)" % i)
    t.refresh() # to show immediately the update
    sleep(0.01)
print('HELLO')

Bar desc (file 12):   7%|▋         | 7/100 [00:00<00:02, 37.65it/s]

HELLO


Bar desc (file 99): 100%|██████████| 100/100 [00:01<00:00, 63.73it/s]

HELLO





In [71]:
import sys
import time
 
def progress(p, l):
    sys.stdout.write("\r%d / 100" %(int(p * 100 / (l - 1))))
    sys.stdout.flush()
     
length = 100
 
for i in range(length):
    progress(i, length)
     
    #重い処理のはじまり
    time.sleep(0.01)
    #重い処理の終わり
print('\nHELLO')

100 / 100
HELLO


In [76]:
a = [1,2,3,4]
for i,d in enumerate(a, start=1):
    print(i)

1
2
3
4


In [79]:
tqdm.write('test')
print('HELLO')
tqdm.write('test')

test
HELLO
test


In [80]:
pprint('TEST')

Pretty printing has been turned OFF


In [4]:

'hello'

'hello'