In [None]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import shutil
from sklearn.model_selection import train_test_split

In [None]:
#创建目录
base_dir = 'dogvscats/train'
#创建train和validation目录
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)

In [None]:
#创建dogs ，cats分类的文件夹
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
os.makedirs(train_cats_dir, exist_ok=True)
os.makedirs(train_dogs_dir, exist_ok=True)
os.makedirs(validation_cats_dir, exist_ok=True)
os.makedirs(validation_dogs_dir, exist_ok=True)

In [None]:
#指定训练数据的文件夹
original_train_dir = './train'
#获取所有猫和狗的图片地址
all_cats = [os.path.join(original_train_dir, f) for f in os.listdir(original_train_dir) if 'cat' in f]
all_dogs = [os.path.join(original_train_dir, f) for f in os.listdir(original_train_dir) if 'dog' in f]
print (len(all_cats))
print (len(all_dogs))
print(all_cats)
#分割训练，测试集
train_cats, val_cats = train_test_split(all_cats, test_size=0.2, random_state=42)
train_dogs, val_dogs = train_test_split(all_dogs, test_size=0.2, random_state=42)

In [None]:
#把图片分类存到dogvscats train
for file in train_cats:
    shutil.copy(file, train_cats_dir)
for file in val_cats:
    shutil.copy(file, validation_cats_dir)
for file in train_dogs:
    shutil.copy(file, train_dogs_dir)
for file in val_dogs:
    shutil.copy(file, validation_dogs_dir)

In [None]:
# ImageDataGenerator 是 Keras 提供的一个实用类，
# 它可以对图像进行实时数据增强和预处理。
# 在这里，生成器对图像进行的唯一预处理操作是将像素值重新缩放到 [0, 1] 范围，即将每个像素值乘以 1./255。
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

#使用 train_datagen 创建了一个训练数据生成器
# train_generator。flow_from_directory 方法会从指定目录中读取图像，并将它们生成批量数据。
# train_dir: 包含训练图像的目录。
# target_size=(150, 150): 将所有图像调整为 150x150 像素的大小。
# batch_size=20: 每个批次生成 20 张图像。
# class_mode='binary': 由于是二分类问题（猫和狗），使用二进制标签。

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)


validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

In [None]:
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import os
log_dir = os.path.join("logs", "fit", "regularized_model")
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
checkpoint_path = "regularized_model_checkpoint.keras"
checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, save_best_only=False, save_weights_only=False)

In [None]:
#使用sequential,构建神经网络模型
baseline_model = tf.keras.models.Sequential([
    #L1 第一层卷积层:
    # 包含 32 个 3x3 的卷积核，激活函数为 ReLU。输入图像尺寸为 150x150，通道数为 3（RGB 图像）。
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    # 第一层池化层:
	# MaxPooling2D((2, 2)): 2x2 的最大池化层，用于减少特征图的尺寸。
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # 第二层卷积层:
	# Conv2D(64, (3, 3), activation='relu'): 包含 64 个 3x3 的卷积核，激活函数为 ReLU。
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    
    # 第二层池化层:
	# MaxPooling2D((2, 2)): 2x2 的最大池化层。
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # 第三层卷积层:
	# Conv2D(128, (3, 3), activation='relu'): 包含 128 个 3x3 的卷积核，激活函数为 ReLU。
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    
    # 第三层池化层:
	# MaxPooling2D((2, 2)): 2x2 的最大池化层。
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # 第四层卷积层:
	# Conv2D(128, (3, 3), activation='relu'): 包含 128 个 3x3 的卷积核，激活函数为 ReLU。
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    
    # 第四层池化层:
	# •MaxPooling2D((2, 2)): 2x2 的最大池化层。
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # Flatten 层:
	# Flatten(): 将多维特征图展平成一维向量。
    tf.keras.layers.Flatten(),
    #全连接层:
	# Dense(512, activation='relu'): 512 个神经元，激活函数为 ReLU。
    tf.keras.layers.Dense(512, activation='relu'),
    
    # 输出层:
	# Dense(1, activation='sigmoid'): 1 个神经元，激活函数为 sigmoid，用于二分类。
    tf.keras.layers.Dense(1, activation='sigmoid')
])

#编译模型，指定优化器为 Adam，损失函数为二元交叉熵，评估指标为准确率。
baseline_model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

# 训练模型，使用 fit 方法，参数说明如下：
# train_generator: 训练数据生成器。
# steps_per_epoch=100: 每个 epoch 包含 100 个批次。
# epochs=30: 训练 30 个 epochs。
# validation_data=validation_generator: 验证数据生成器。
# validation_steps=50: 每个 epoch 包含 50 个验证批次。
history_baseline = baseline_model.fit(
    train_generator,
    steps_per_epoch=10,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=10,
    callbacks=[tensorboard_callback, checkpoint_callback]
)

In [None]:
#with augmentation
# 这段代码定义了一个用于训练数据的数据增强生成器 train_datagen_augmented，包含以下增强参数：
# rescale=1./255: 将图像像素值缩放到 [0, 1] 范围。
# rotation_range=40: 随机旋转图像的角度范围为 [-40, 40] 度。
# width_shift_range=0.2: 随机水平平移图像，平移范围为总宽度的 20%。
# height_shift_range=0.2: 随机垂直平移图像，平移范围为总高度的 20%。
# shear_range=0.2: 随机剪切变换图像。
# zoom_range=0.2: 随机缩放图像。
#horizontal_flip=True: 随机水平翻转图像。
# fill_mode='nearest': 填充新创建像素的方式为“最近”。

train_datagen_augmented = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
# 这段代码使用 train_datagen_augmented 创建了一个数据生成器 train_generator_augmented，用于从 train_dir 目录中加载和增强图像：
# 	•	target_size=(150, 150): 将所有图像调整为 150x150 的尺寸。
# 	•	batch_size=20: 每个批次包含 20 张图像。
# 	•	class_mode='binary': 这是一个二分类任务（猫和狗）。
train_generator_augmented = train_datagen_augmented.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

In [None]:
import mlflow
import mlflow.tensorflow
# 加入了dropout层，Dropout(0.5): 以 50% 的概率随机丢弃一些神经元，防止过拟合。
regularized_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

regularized_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# 计算steps_per_epoch和validation_steps
train_steps = len(train_generator_augmented)
val_steps = len(validation_generator)
mlflow.tensorflow.autolog()
# Train the regularized model
with mlflow.start_run(run_name="tensorflow",experiment_id="593623705617405727", nested=True) as run:
    try:
        history_regularized = regularized_model.fit(
            train_generator_augmented,
            steps_per_epoch=50,
            epochs=40,
            validation_data=validation_generator,
            validation_steps=5,
            callbacks=[tensorboard_callback, checkpoint_callback]
        )
    except Exception as e:
        print("error is {e}")

In [None]:
import tensorflow as tf
import mlflow
import mlflow.tensorflow

# 定义一个函数来创建模型
def create_model():
    with tf.name_scope("Model"):
        model = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3), name="Conv2D_1"),
            tf.keras.layers.MaxPooling2D((2, 2), name="MaxPooling2D_1"),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu', name="Conv2D_2"),
            tf.keras.layers.MaxPooling2D((2, 2), name="MaxPooling2D_2"),
            tf.keras.layers.Conv2D(128, (3, 3), activation='relu', name="Conv2D_3"),
            tf.keras.layers.MaxPooling2D((2, 2), name="MaxPooling2D_3"),
            tf.keras.layers.Conv2D(128, (3, 3), activation='relu', name="Conv2D_4"),
            tf.keras.layers.MaxPooling2D((2, 2), name="MaxPooling2D_4"),
            tf.keras.layers.Flatten(name="Flatten"),
            tf.keras.layers.Dense(512, activation='relu', name="Dense_1"),
            tf.keras.layers.Dropout(0.5, name="Dropout"),
            tf.keras.layers.Dense(1, activation='sigmoid', name="Output")
        ])
        return model

# 创建和编译模型
with tf.name_scope("Model_Compilation"):
    regularized_model = create_model()
    regularized_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 计算steps_per_epoch和validation_steps
train_steps = len(train_generator_augmented)
val_steps = len(validation_generator)

# 开启MLflow自动记录
mlflow.tensorflow.autolog()

# 训练模型并记录到MLflow
# with mlflow.start_run(run_name="tensorflow", experiment_id="593623705617405727", nested=True) as run:
try:
    with tf.name_scope("Model_Training"):
        history_regularized = regularized_model.fit(
            train_generator_augmented,
            steps_per_epoch=100,
            epochs=10,
            validation_data=validation_generator,
            validation_steps=25,
            callbacks=[tensorboard_callback, checkpoint_callback]
        )
except Exception as e:
    print(f"Error is {e}")

# 保存模型架构到TensorBoard日志中
with tf.name_scope("TensorBoard"):
    log_dir = "logs"
    file_writer = tf.summary.create_file_writer(log_dir + "/model")
    with file_writer.as_default():
        tf.summary.graph(tf.get_default_graph())

In [None]:
train_steps = len(train_generator_augmented)
val_steps = len(validation_generator)
print (train_steps,val_steps)

In [None]:
import tensorflow as tf
import mlflow
import mlflow.tensorflow
from datetime import datetime

# 定义一个函数来创建模型
def create_model():
    with tf.name_scope("Model"):
        model = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3), name="Conv2D_1"),
            tf.keras.layers.MaxPooling2D((2, 2), name="MaxPooling2D_1"),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu', name="Conv2D_2"),
            tf.keras.layers.MaxPooling2D((2, 2), name="MaxPooling2D_2"),
            tf.keras.layers.Conv2D(128, (3, 3), activation='relu', name="Conv2D_3"),
            tf.keras.layers.MaxPooling2D((2, 2), name="MaxPooling2D_3"),
            tf.keras.layers.Conv2D(128, (3, 3), activation='relu', name="Conv2D_4"),
            tf.keras.layers.MaxPooling2D((2, 2), name="MaxPooling2D_4"),
            tf.keras.layers.Flatten(name="Flatten"),
            tf.keras.layers.Dense(512, activation='relu', name="Dense_1"),
            tf.keras.layers.Dropout(0.5, name="Dropout"),
            tf.keras.layers.Dense(1, activation='sigmoid', name="Output")
        ])
        return model

# 创建和编译模型
regularized_model = create_model()
regularized_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 计算steps_per_epoch和validation_steps
train_steps = len(train_generator_augmented)
val_steps = len(validation_generator)

# 设置TensorBoard日志目录
log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True)

# 开启MLflow自动记录
# mlflow.tensorflow.autolog()

# 训练模型并记录到MLflow
# with mlflow.start_run(run_name="tensorflow", experiment_id="593623705617405727", nested=True) as run:
try:
    history_regularized = regularized_model.fit(
        train_generator_augmented,
        steps_per_epoch=50,
        epochs=20,
        validation_data=validation_generator,
        validation_steps=10,
        callbacks=[tensorboard_callback, checkpoint_callback]
    )
except Exception as e:
    print(f"Error is {e}")

# 启动TensorBoard命令
# tensorboard --logdir logs/fit

In [None]:
import tensorflow as tf
from tensorflow import keras
import datetime

# 加载数据（使用示例数据集）
(x_train, y_train), (x_val, y_val) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(-1, 784) / 255.0
x_val = x_val.reshape(-1, 784) / 255.0

# 定义模型
model = keras.models.Sequential([
    keras.layers.Dense(32, activation='relu', input_shape=(784,)),
    keras.layers.Dense(10, activation='softmax')
])

# 编译模型
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# 创建 TensorBoard 回调
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# 训练模型
model.fit(x_train, y_train, epochs=5, validation_data=(x_val, y_val), callbacks=[tensorboard_callback])

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
import datetime

# 加载数据（使用示例数据集）
(x_train, y_train), (x_val, y_val) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(-1, 784) / 255.0
x_val = x_val.reshape(-1, 784) / 255.0

# 定义模型
model = models.Sequential([
    layers.Dense(32, activation='relu', input_shape=(784,)),
    layers.Dense(10, activation='softmax')
])

# 编译模型
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# 创建 TensorBoard 回调
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# 训练模型
model.fit(x_train, y_train, epochs=5, validation_data=(x_val, y_val), callbacks=[tensorboard_callback])

# 启动 TensorBoard（在命令行中运行）
# tensorboard --logdir=logs/fit