In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
import pandas as pd
import numpy as np
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import

In [3]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

In [4]:
from tqdm.notebook import tqdm
from PIL import Image
import os

In [5]:
## parameters
### 五张图片一组
step=5

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

### read images

In [7]:
path = r'C:\Users\liuya\Downloads\3d_printing_research\clipped_samples'
image_list = os.listdir(path)

In [8]:
full_path = os.path.join(path, image_list[350])
np.array(Image.open(full_path).convert('RGB')).shape

(250, 730, 3)

In [9]:
image_ls = []

for i in tqdm(image_list):
    full_path = os.path.join(path, i)
    img = Image.open(full_path).convert('L')
    img_array = np.asarray(img)
    image_ls.append(img_array)

  0%|          | 0/4046 [00:00<?, ?it/s]

In [10]:
def sliding_window(datas,steps=1,width=step):
    win_set=[]
    for i in tqdm(np.arange(0,len(datas),steps)):
        temp=datas[i:i+width]
        if len(temp)==width:
            win_set.append(np.array(temp)[None,:,:,:])
    return np.array(win_set)

In [11]:
data_input = sliding_window(image_ls,steps=1,width=step)
data_input = torch.tensor(data_input, dtype=torch.float32)

  0%|          | 0/4046 [00:00<?, ?it/s]

In [12]:
## 5-dimensions: (N,C,D,H,W) batch, channels=1 (grey), # images in a sequence, height, weight
## for 4-dimensions: (N,C,D,W)
data_input.shape

torch.Size([4042, 1, 5, 250, 730])

In [13]:
data_input.view((-1, 1, data_input.shape[3], data_input.shape[4])).shape

torch.Size([20210, 1, 250, 730])

### build the model

In [14]:
class Conv_NN_encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv2d1 = nn.Conv2d(1, 16, kernel_size=(10,30))
        self.maxpool2d1 = nn.MaxPool2d((3,9), return_indices=True)
        self.conv2d2 = nn.Conv2d(16, 8, kernel_size=(10,30))
        self.maxpool2d2 = nn.MaxPool2d((3,9), return_indices=True)
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(in_features=920, out_features=128)
        
    def forward(self, x):
        batch_size = x.shape[0]
        n_tau = x.shape[2]
        #1 means 1 channel, multiply sample size with tau to create this many of 1*W vectors
        x = x.view((-1, 1, x.shape[3], x.shape[4]))
        output = F.relu(self.conv2d1(x))
        output, indices1 = self.maxpool2d1(output)
        output = F.relu(self.conv2d2(output))
        output, indices2 = self.maxpool2d2(output)
        output = self.flatten(output)
        output = self.linear1(output)
        #back to the previous dimension (sample_size, tau)
        output = output.view((batch_size, n_tau, -1))
        return output, indices1, indices2

In [15]:
class Lstm_encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm1 = nn.LSTM(input_size=128, hidden_size=32)
        self.lstm2 = nn.LSTM(input_size=32, hidden_size=16)
        
    def forward(self, x):
        #reshape x to fit the input requirement of lstm
        x = x.permute(1, 0, 2)
        output, hn = self.lstm1(x)
        output, (hidden, cell) = self.lstm2(output)
        # output include all timestep, while hidden just include the last timestep.
        hidden = hidden.repeat((output.shape[0], 1, 1))
        return hidden

In [16]:
class Lstm_decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm1 = nn.LSTM(input_size=16, hidden_size=32)
        self.lstm2 = nn.LSTM(input_size=32, hidden_size=128)
    
    def forward(self, x):
        # not need to reshape
        output, hn = self.lstm1(x)
        output, hn = self.lstm2(output)
        #reshape output
        output = output.permute(1, 0, 2)
        return output

In [17]:
class Conv_NN_decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(128, 920)
        self.maxunpool2d1 = nn.MaxUnpool2d((3,9))
        self.conv2d1 = nn.Conv2d(8, 16, kernel_size=(10,30), padding=(9,29))
        self.maxunpool2d2 = nn.MaxUnpool2d((3,9))
        self.conv2d2 = nn.Conv2d(16, 1, kernel_size=(10,30), padding=(9,29))
        #self.pad = nn.ConstantPad2d((9,9,58,0), 0)

        
    def forward(self, x, indices1, indices2):
        x = x.contiguous().view((-1, x.shape[2]))
        output = self.linear1(x)
        output = output.view((output.shape[0], 8, -1, 5))
        output = self.maxunpool2d1(output, indices2, output_size=torch.Size([output.shape[0], 8, 71, 48]))
        output = self.conv2d1(output)
        output = self.maxunpool2d2(output, indices1, output_size=torch.Size([output.shape[0], 16, 241, 701]))
        output = self.conv2d2(output)
        #output = self.pad(output)
        #back to original size
        output = output.view((-1, 1, 5, output.shape[2], output.shape[3]))
        return output

In [18]:
class net(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.Conv_NN_encoder = args[0]
        self.Lstm_encoder = args[1]
        self.Lstm_decoder = args[2]
        self.Conv_NN_decoder = args[3]
    
    def forward(self, x):
        output, indc1, indc2 = self.Conv_NN_encoder(x)
        output = self.Lstm_encoder(output)
        output = self.Lstm_decoder(output)
        output = self.Conv_NN_decoder(output, indc1, indc2)
        return output

In [19]:
model = net(Conv_NN_encoder(), Lstm_encoder(), Lstm_decoder(), Conv_NN_decoder())

### train the model

In [20]:
def train(model, device, train_loader, optimizer, epoch):
    
    model.train() #trian model
    for batch_idx, data in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)

        ##calculate loss
        loss = 0
        for i in range(data.shape[0]):
            loss += F.mse_loss(output[i], data[i], reduction='sum')
        loss /= data.shape[0]
        #loss = F.mse_loss(output, data)
        loss.backward()
        optimizer.step()
        # print result every 10 batch
        if batch_idx % 10 == 0:
            print('Train Epoch: {} ... Batch: {} ... Loss: {:.8f}'.format(epoch, batch_idx, loss))

In [21]:
def test(model, device, test_loader):
    model.eval() #evaluate model
    test_loss = 0
    with torch.no_grad():
        for data in test_loader:
            data = data.to(device)
            output = model(data)
            #calculate sum loss
            test_loss += F.mse_loss(output, data, reduction='sum').item()
    
        test_loss /= len(test_loader.dataset)
        print('------------------- Test set: Average loss: {:.4f} ... Samples: {}'.format(test_loss, len(test_loader.dataset)))

### train test split

In [22]:
train_window_, val_window_ = train_test_split(data_input, test_size=0.2, random_state=2022)

In [23]:
train_loader = torch.utils.data.DataLoader(train_window_, batch_size=16,shuffle=True)
test_loader = torch.utils.data.DataLoader(val_window_, batch_size=16,shuffle=False)

In [24]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [25]:
model = model.to(device)

In [26]:
optimizer = optim.Adam(model.parameters(), lr=0.05)

In [27]:
epochs = 5

In [28]:
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

Train Epoch: 1 ... Batch: 0 ... Loss: 20522881024.00000000
Train Epoch: 1 ... Batch: 10 ... Loss: 4597186048.00000000
Train Epoch: 1 ... Batch: 20 ... Loss: 3296682240.00000000
Train Epoch: 1 ... Batch: 30 ... Loss: 2894288640.00000000
Train Epoch: 1 ... Batch: 40 ... Loss: 2741480448.00000000
Train Epoch: 1 ... Batch: 50 ... Loss: 2194238208.00000000
Train Epoch: 1 ... Batch: 60 ... Loss: 2363062784.00000000
Train Epoch: 1 ... Batch: 70 ... Loss: 2299691264.00000000
Train Epoch: 1 ... Batch: 80 ... Loss: 2150546432.00000000
Train Epoch: 1 ... Batch: 90 ... Loss: 2253511168.00000000
Train Epoch: 1 ... Batch: 100 ... Loss: 2203320064.00000000
Train Epoch: 1 ... Batch: 110 ... Loss: 2035593216.00000000
Train Epoch: 1 ... Batch: 120 ... Loss: 2289164032.00000000
Train Epoch: 1 ... Batch: 130 ... Loss: 2113835264.00000000
Train Epoch: 1 ... Batch: 140 ... Loss: 2096688512.00000000
Train Epoch: 1 ... Batch: 150 ... Loss: 2196679936.00000000
Train Epoch: 1 ... Batch: 160 ... Loss: 2001728384

In [None]:
#### lr=0.5 min_loss=3846104064
#### lr=0.1 min_loss=1067529920
#### lr=0.05 min_loss=1028780992