In [1]:
import torch
from torch.autograd import Variable
import utils
import dataset
from PIL import Image
import models.crnn as crnn

model_path = './data/ocr-lstm.pth'
img_path = './data/demo.png'
alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
print(len(alphabet))
alphabet = utils.alphabetChinese
#alphabet_cn = utils.alphabetChinese
print(len(utils.alphabetChinese))

model = crnn.CRNN(32, 1, len(alphabet)+1, 256)

    
if torch.cuda.is_available():
    model = model.cuda()

print('loading pretrained model from %s' % model_path)

#model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
model.load_weights(model_path)
converter = utils.strLabelConverter(alphabet)

transformer = dataset.resizeNormalize((100, 32))

image = Image.open(img_path).convert('L')
image = transformer(image)

if torch.cuda.is_available():
    image = image.cuda()

image = image.view(1, *image.size())
image = Variable(image)


### 分步骤拆解输出
o = image
print('### CNN + RNN模型输出结果')
print('- input shape:', o.shape)
for m in model.cnn.children():
    o = m(o)
    print("- ", m)
    print(': ', o.shape)

b, c, h, w = o.size()
assert h == 1, "the height of conv must be 1"
o = o.squeeze(2) # remove dim(2), h -> [N, 512, 26]
print("- squeeze(2)")
print(': ', o.shape)

o = o.permute(2, 0, 1)  # [w, b, c] => [26, N, 512]
print("- permute(2, 0, 1)")
print(': ', o.shape)


for m in model.rnn.children():
    o, s = m.rnn(o)
    print("- ", m.rnn)
    print(': ', o.shape)
    
    o = m.embedding(o)
    print("- ", m.embedding)
    print(': ', o.shape)
    
preds = o

# 直接model输出
#model.eval()
#preds = model(image)

max_val, preds = preds.max(2)
print('- max(2)', preds.shape)

#preds = preds.transpose(1, 0).contiguous().view(-1)
preds = preds.view(-1)
print('- view(-1)', preds.shape)


print('### 模型结果')
preds_size = Variable(torch.IntTensor([preds.size(0)]))
raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
print('- raw_pred size: %d, sim_pred size: %d' %(len(raw_pred), len(sim_pred)))
print('- decode result: %-20s => %-20s' % (raw_pred, sim_pred))


36
5529
loading pretrained model from ./data/ocr-lstm.pth
### CNN + RNN模型输出结果
- input shape: torch.Size([1, 1, 32, 100])
-  Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.Size([1, 64, 32, 100])
-  ReLU(inplace=True)
:  torch.Size([1, 64, 32, 100])
-  MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
:  torch.Size([1, 64, 16, 50])
-  Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.Size([1, 128, 16, 50])
-  ReLU(inplace=True)
:  torch.Size([1, 128, 16, 50])
-  MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
:  torch.Size([1, 128, 8, 25])
-  Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.Size([1, 256, 8, 25])
-  BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
:  torch.Size([1, 256, 8, 25])
-  ReLU(inplace=True)
:  torch.Size([1, 256, 8, 25])
-  Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.S

### Tensor Shape变化总结
1. cnn input [N, 1, 32, 100] => output [N, 512, 1, 26]  
2. w = input_w/4 + 1 + 1 - 1 = input_w/4 + 1,   h = input_h/16-1(最后输出必须为1)
3. 默认input [w(100), h(32)] -> [w(26), h(1)]

### CNN + RNN模型输出结果
- input shape: torch.Size([1, 1, 32, 100])
-  Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.Size([1, 64, 32, 100])
-  ReLU(inplace=True)
:  torch.Size([1, 64, 32, 100])
-  MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
:  torch.Size([1, 64, 16, 50])
-  Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.Size([1, 128, 16, 50])
-  ReLU(inplace=True)
:  torch.Size([1, 128, 16, 50])
-  MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
:  torch.Size([1, 128, 8, 25])
-  Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.Size([1, 256, 8, 25])
-  BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
:  torch.Size([1, 256, 8, 25])
-  ReLU(inplace=True)
:  torch.Size([1, 256, 8, 25])
-  Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.Size([1, 256, 8, 25])
-  ReLU(inplace=True)
:  torch.Size([1, 256, 8, 25])
-  MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1), dilation=1, ceil_mode=False)
:  torch.Size([1, 256, 4, 26])
-  Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.Size([1, 512, 4, 26])
-  BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
:  torch.Size([1, 512, 4, 26])
-  ReLU(inplace=True)
:  torch.Size([1, 512, 4, 26])
-  Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
:  torch.Size([1, 512, 4, 26])
-  ReLU(inplace=True)
:  torch.Size([1, 512, 4, 26])
-  MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1), dilation=1, ceil_mode=False)
:  torch.Size([1, 512, 2, 27])
-  Conv2d(512, 512, kernel_size=(2, 2), stride=(1, 1))
:  torch.Size([1, 512, 1, 26])
-  BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
:  torch.Size([1, 512, 1, 26])
-  ReLU(inplace=True)
:  torch.Size([1, 512, 1, 26])
- squeeze(2)
:  torch.Size([1, 512, 26])
- permute(2, 0, 1)
:  torch.Size([26, 1, 512])
-  LSTM(512, 256, bidirectional=True)
:  torch.Size([26, 1, 512])
-  Linear(in_features=512, out_features=256, bias=True)
:  torch.Size([26, 1, 256])
-  LSTM(256, 256, bidirectional=True)
:  torch.Size([26, 1, 512])
-  Linear(in_features=512, out_features=37, bias=True)
:  torch.Size([26, 1, 37])

In [17]:
import torch
from torch.autograd import Variable
import utils
import dataset
from PIL import Image
import models.crnn as crnn

model_path = './data/ocr-lstm.pth'
img_path = './data/demo3.jpeg'
alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'

alphabet = utils.alphabetChinese

model = crnn.CRNN(32, 1, len(alphabet)+1, 256)
    
if torch.cuda.is_available():
    model = model.cuda()

print('loading pretrained model from %s' % model_path)

model.load_weights(model_path)
converter = utils.strLabelConverter(alphabet)




image = Image.open(img_path).convert('L')
print(image.size)
img_w = 32 * image.size[0] // image.size[1]
#scale = image.size[10] * image.size[1]*1.0 / 32

transformer = dataset.resizeNormalize((img_w, 32))
image = transformer(image)

if torch.cuda.is_available():
    image = image.cuda()

image = image.view(1, *image.size())
image = Variable(image)

#model.eval()
preds = model(image)

max_val, preds = preds.max(2)
preds = preds.view(-1)


print('### 模型结果')
preds_size = Variable(torch.IntTensor([preds.size(0)]))
raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
print('- raw_pred size: %d, sim_pred size: %d' %(len(raw_pred), len(sim_pred)))
print('- decode result: %-20s => %-20s' % (raw_pred, sim_pred))

torch.save( model.state_dict(), 'crnn_withlstm.pth')


loading pretrained model from ./data/ocr-lstm.pth
(323, 83)
self.rnn(conv) shape torch.Size([32, 1, 5530])
### 模型结果
- raw_pred size: 32, sim_pred size: 7
- decode result: 实---------现---有----什---么---不---同 => 实现有什么不同             


In [1]:
import torch
from torch.autograd import Variable
import utils
import dataset
from PIL import Image
import models.crnn as crnn

model_path = './data/crnn_lstm.pth'
img_path = './data/demo3.jpeg'
alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'

alphabet = utils.alphabetChinese

model = crnn.CRNN(32, 1, len(alphabet)+1, 256)
    
if torch.cuda.is_available():
    model = model.cuda()

print('loading pretrained model from %s' % model_path)

model.load_weights(model_path)
converter = utils.strLabelConverter(alphabet)

image = Image.open(img_path).convert('L')
print(image.size)
img_w = 32 * image.size[0] // image.size[1]
#scale = image.size[10] * image.size[1]*1.0 / 32

transformer = dataset.resizeNormalize((img_w, 32))
image = transformer(image)

if torch.cuda.is_available():
    image = image.cuda()

image = image.view(1, *image.size())
image = Variable(image)

preds = model(image)

max_val, preds = preds.max(2)
preds = preds.view(-1)


print('### 模型结果')
preds_size = Variable(torch.IntTensor([preds.size(0)]))
raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
print('- raw_pred size: %d, sim_pred size: %d' %(len(raw_pred), len(sim_pred)))
print('- decode result: %-20s => %-20s' % (raw_pred, sim_pred))

#torch.save( model.state_dict(), 'crnn_withlstm.pth')


loading pretrained model from ./data/crnn_lstm.pth
(323, 83)
self.rnn(conv) shape torch.Size([32, 1, 5530])
### 模型结果
- raw_pred size: 32, sim_pred size: 7
- decode result: 实---------现---有----什---么---不---同 => 实现有什么不同             


In [17]:
import torch
from PIL import Image
import models.crnn as crnn
import numpy as np

model_path = './data/crnn_lstm.pth'
#model_path = './data/netCRNN.pth'
img_path = './data/demo5.jpeg'
img_path = './data/demo.png'

model = crnn.CRNN()
print('loading pretrained model from %s' % model_path)
model.load_weights(model_path)

image = Image.open(img_path).convert('L')



image = cv2.imread(img_path)[:,:,(2,1,0)]

#image = image.T
print("image.shape:",image.shape)

image = image.transpose(1,0,2)
print("image.T.shape:",image.shape)

image = Image.fromarray(image).convert('L')
image = image.rotate(90, expand = 1)
print('size:',image.size)

img_w = 32 * image.size[0] // image.size[1]

print(image.size, 32, img_w)
preds, raw_pred,sim_pred = model.predict(image)


print('### 模型结果')
print('- raw_pred size: %d, sim_pred size: %d' %(len(raw_pred), len(sim_pred)))
print('- decode result: %-20s => %-20s' % (raw_pred, sim_pred))


loading pretrained model from ./data/crnn_lstm.pth
image.shape: (72, 184, 3)
image.T.shape: (184, 72, 3)
size: (184, 72)
(184, 72) 32 81
### 模型结果
- raw_pred size: 26, sim_pred size: 5
- decode result: v-------a-------p--p----e- => vappe               


In [1]:
import torch
from PIL import Image
import models.crnn as crnn
import cv2

model_path = './data/crnn_dense.pth'
model_path = './data/netCRNN_True.pth'
img_path = './data/demo6.jpeg'
#img_path = './data/demo.png'

model = crnn.CRNN(lstmFlag=True)
print('loading pretrained model from %s' % model_path)
model.load_weights(model_path)

image = Image.fromarray(cv2.imread(img_path)[:,:,(2,1,0)]).convert('L')

#image = Image.open(img_path).convert('L')
print(image.size)

preds, raw_pred,sim_pred = model.predict(image)


print('### 模型结果')
print('- raw_pred size: %d, sim_pred size: %d' %(len(raw_pred), len(sim_pred)))
print('- decode result: %-20s => %-20s' % (raw_pred, sim_pred))


loading pretrained model from ./data/netCRNN_True.pth
(63, 446)
### 模型结果
- raw_pred size: 26, sim_pred size: 7
- decode result: 胡-------思--乱---想--的--后---果 => 胡思乱想的后果             
