编码器块

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Activation # type: ignore
from tensorflow.keras.models import Model# type: ignore

def encoder_block(inputs, num_filters):
    # num_filters: 卷积核的数量，也就是输出特征图的深度。
    # 3: 卷积核的大小为3x3。
    # padding='valid': 无填充，卷积操作后输出特征图的大小会减小。
    x = Conv2D(num_filters, 3, padding='valid')(inputs)
    x = Activation('relu')(x)
    x = Conv2D(num_filters, 3, padding='valid')(x)
    x = Activation('relu')(x)
    # pool_size=(2, 2): 池化窗口的大小为2x2。
    # strides=2: 池化步幅为2，即池化窗口每次移动2个单位。
    x = MaxPooling2D(pool_size=(2, 2), strides=2)(x)
    return x


解码器块
- 目的是通过上采样和卷积操作逐渐恢复图像的空间分辨率，同时利用跳跃连接融合来自编码器路径的特征。
- 转置卷积操作增加特征图的空间分辨率，并利用跳跃连接融合来自编码器路径的特征信息。然后，通过两次卷积和激活操作进一步处理这些特征图。这个过程帮助模型逐层恢复图像的空间尺寸，同时保留并整合编码路径中的关键信息。

In [None]:
from tensorflow.keras.layers import Conv2DTranspose, Concatenate # type: ignore

def decoder_block(inputs, skip_features, num_filters):
    #对输入特征图进行转置卷积（反卷积）操作，增加空间分辨率
    #num_filters: 转置卷积核的数量，即输出特征图的深度。
	# （2, 2): 转置卷积核的大小为2x2。
	# strides=2: 转置卷积步幅为2，即每次移动2个单位。
	# padding='valid': 无填充，输出特征图的大小会减小。
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding='valid')(inputs)
    
    # 作用: 调整跳跃连接特征图的尺寸，以匹配当前转置卷积层输出特征图的空间尺寸。
	# size: 目标尺寸，即转置卷积后特征图的高度和宽度。
	# 输入: 编码器路径中相应层的输出特征图。
	# 输出: 尺寸调整后的特征图。
    skip_features = tf.image.resize(skip_features, size=(x.shape[1], x.shape[2]))
    
    # 作用: 将上一步的转置卷积输出和调整尺寸后的跳跃连接特征图在深度维度上进行连接。
	# 输入: 转置卷积后的特征图和调整尺寸后的跳跃连接特征图。
	# 输出: 连接后的特征图，深度是两个输入特征图深度的总和。
    x = Concatenate()([x, skip_features])
    #对连接后的特征图进行二维卷积操作。
    x = Conv2D(num_filters, 3, padding='valid')(x)
    x = Activation('relu')(x)
    x = Conv2D(num_filters, 3, padding='valid')(x)
    x = Activation('relu')(x)
    return x

Unet模型

In [None]:
	# 作用: 定义模型的输入形状。
	# input_shape=(256, 256, 3): 输入图像的形状为256x256，3个通道（RGB图像）。
	# num_classes=1: 输出的类别数量，用于二分类分割任务。
	# 输入: 输入图像。
	# 输出: 输入层张量。
def unet_model(input_shape=(256, 256, 3), num_classes=1):
    inputs = Input(input_shape)
    
    # 收缩路径 逐步减少空间分辨率并提取高层次特征
    s1 = encoder_block(inputs, 64)
    s2 = encoder_block(s1, 128)
    s3 = encoder_block(s2, 256)
    s4 = encoder_block(s3, 512)
    
    # 瓶颈层 在编码路径和解码路径之间起到连接作用，进一步提取高级特征
    b1 = Conv2D(1024, 3, padding='valid')(s4)
    b1 = Activation('relu')(b1)
    b1 = Conv2D(1024, 3, padding='valid')(b1)
    b1 = Activation('relu')(b1)
    
    # 扩展路径 逐步恢复空间分辨率，同时利用跳跃连接融合编码路径中的特征
    s5 = decoder_block(b1, s4, 512)
    s6 = decoder_block(s5, s3, 256)
    s7 = decoder_block(s6, s2, 128)
    s8 = decoder_block(s7, s1, 64)
    
    # 输出层
    outputs = Conv2D(num_classes, 1, padding='valid', activation='sigmoid')(s8)
    
    model = Model(inputs, outputs, name='U-Net')
    return model

# if __name__ == '__main__':
#     model = unet_model(input_shape=(572, 572, 3), num_classes=2)
#     model.summary()


应用

In [11]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Activation, Conv2DTranspose, Concatenate, Layer
from tensorflow.keras.models import Model

class ResizeLayer(Layer):
    def call(self, inputs, size):
        return tf.image.resize(inputs, size=size)

def encoder_block(inputs, num_filters):
    x = Conv2D(num_filters, 3, padding='valid')(inputs)
    x = Activation('relu')(x)
    x = Conv2D(num_filters, 3, padding='valid')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=2)(x)
    return x

def decoder_block(inputs, skip_features, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding='valid')(inputs)
    skip_features = ResizeLayer()(skip_features, size=(x.shape[1], x.shape[2]))
    x = Concatenate()([x, skip_features])
    x = Conv2D(num_filters, 3, padding='valid')(x)
    x = Activation('relu')(x)
    x = Conv2D(num_filters, 3, padding='valid')(x)
    x = Activation('relu')(x)
    return x

def unet_model(input_shape=(256, 256, 3), num_classes=1):
    inputs = Input(input_shape)
    
    # 收缩路径
    s1 = encoder_block(inputs, 64)
    s2 = encoder_block(s1, 128)
    s3 = encoder_block(s2, 256)
    s4 = encoder_block(s3, 512)
    
    # 瓶颈层
    b1 = Conv2D(1024, 3, padding='valid')(s4)
    b1 = Activation('relu')(b1)
    b1 = Conv2D(1024, 3, padding='valid')(b1)
    b1 = Activation('relu')(b1)
    
    # 扩展路径
    s5 = decoder_block(b1, s4, 512)
    s6 = decoder_block(s5, s3, 256)
    s7 = decoder_block(s6, s2, 128)
    s8 = decoder_block(s7, s1, 64)
    
    # 输出层
    outputs = Conv2D(num_classes, 1, padding='valid', activation='sigmoid')(s8)
    
    model = Model(inputs, outputs, name='U-Net')
    return model

if __name__ == '__main__':
    model = unet_model(input_shape=(572, 572, 3), num_classes=2)
    model.summary()

In [15]:
import numpy as np
from PIL import Image
from tensorflow.keras.preprocessing import image

# 加载图像
img = Image.open('./Cat from U-Net.png')
img = Image.open('./DATA/cats/cat.265.jpg')
# 预处理图像
img = img.resize((572, 572))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array[:,:,:3], axis=0)
img_array = img_array / 255.

# 加载模型
model = unet_model(input_shape=(572, 572, 3), num_classes=2)

# 进行预测
predictions = model.predict(img_array)

# 将预测转换为numpy数组并调整为原始图像尺寸
predictions = np.squeeze(predictions, axis=0)
predictions = np.argmax(predictions, axis=-1)
predictions = Image.fromarray(np.uint8(predictions * 255))
predictions = predictions.resize((img.width, img.height))

# 保存预测图像
predictions.save('predicted_image.jpg')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 498ms/step


In [None]:
import tensorflow as tf

def encoder_block(inputs, num_filters):
    x = tf.keras.layers.Conv2D(num_filters, 3, padding='same')(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.Conv2D(num_filters, 3, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)
    p = tf.keras.layers.MaxPool2D(pool_size=(2, 2))(x)
    return p, x

def decoder_block(inputs, skip_features, num_filters):
    x = tf.keras.layers.Conv2DTranspose(num_filters, (2, 2), strides=2, padding='same')(inputs)
    
    # Crop skip features if needed
    x_shape = tf.shape(x)
    skip_shape = tf.shape(skip_features)
    height_diff = skip_shape[1] - x_shape[1]
    width_diff = skip_shape[2] - x_shape[2]
    
    if height_diff > 0 or width_diff > 0:
        skip_features = tf.keras.layers.Cropping2D(cropping=((height_diff//2, height_diff-height_diff//2),
                                                             (width_diff//2, width_diff-width_diff//2)))(skip_features)
    
    x = tf.keras.layers.Concatenate()([x, skip_features])
    x = tf.keras.layers.Conv2D(num_filters, 3, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.Conv2D(num_filters, 3, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)
    return x

def unet_model(input_shape=(572, 572, 3), num_classes=1):
    inputs = tf.keras.layers.Input(input_shape)
    
    # Contracting Path
    p1, s1 = encoder_block(inputs, 64)
    p2, s2 = encoder_block(p1, 128)
    p3, s3 = encoder_block(p2, 256)
    p4, s4 = encoder_block(p3, 512)
    
    # Bottleneck
    b1 = tf.keras.layers.Conv2D(1024, 3, padding='same')(p4)
    b1 = tf.keras.layers.BatchNormalization()(b1)
    b1 = tf.keras.layers.Activation('relu')(b1)
    b1 = tf.keras.layers.Conv2D(1024, 3, padding='same')(b1)
    b1 = tf.keras.layers.BatchNormalization()(b1)
    b1 = tf.keras.layers.Activation('relu')(b1)
    
    # Expansive Path
    d1 = decoder_block(b1, s4, 512)
    d2 = decoder_block(d1, s3, 256)
    d3 = decoder_block(d2, s2, 128)
    d4 = decoder_block(d3, s1, 64)
    
    # Output
    outputs = tf.keras.layers.Conv2D(num_classes, 1, padding='same', activation='sigmoid')(d4)
    
    model = tf.keras.models.Model(inputs, outputs, name='U-Net')
    return model

if __name__ == '__main__':
    model = unet_model(input_shape=(572, 572, 3), num_classes=2)
    model.summary()

In [10]:
import numpy as np 
from PIL import Image 
from tensorflow.keras.preprocessing import image 

# Load the image 
img = Image.open('./Cat from U-Net.png') 
# Preprocess the image 
img = img.resize((572, 572)) 
img_array = image.img_to_array(img) 
img_array = np.expand_dims(img_array[:,:,:3], axis=0) 
img_array = img_array / 255.

# Load the model 
model = unet_model(input_shape=(572, 572, 3), num_classes=2) 

# Make predictions 
predictions = model.predict(img_array) 

# Convert predictions to a numpy array and resize to original image size 
predictions = np.squeeze(predictions, axis=0) 
predictions = np.argmax(predictions, axis=-1) 
predictions = Image.fromarray(np.uint8(predictions*255)) 
predictions = predictions.resize((img.width, img.height)) 

# Save the predicted image 
predictions.save('predicted_image.jpg') 
predictions


ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```
