In [7]:
#自编码器（Autoencoder）是一种非常适合用于异常值检测的神经网络结构，特别是在无法获得大量带标签异常样本的情况下。它们通常被训练为只重建正常数据的分布，所以当输入的是异常数据时，重建误差会较大，从而可以用来检测异常。
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import numpy as np

In [30]:
def create_autoencoder(input_shape):
    # 编码器
    encoder = models.Sequential([
        layers.InputLayer(input_shape=input_shape),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2), padding='same'), # 将得到 65x65
        layers.Conv2D(16, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2), padding='same'), # 将得到 33x33
    ])
    
    # 解码器
    decoder = models.Sequential([
        layers.Conv2DTranspose(16, (3, 3), strides=2, activation='relu', padding='same'), # 应该会扩展到 66x66
        layers.Cropping2D(cropping=((1, 0), (1, 0))), # 裁剪到 65x65
        layers.Conv2DTranspose(32, (3, 3), strides=2, activation='relu', padding='same'), # 应该会扩展到 130x130
        layers.Cropping2D(cropping=((1, 0), (1, 0))), # 裁剪到 129x129
        layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')
    ])
    
    autoencoder = models.Sequential([encoder, decoder])
    return autoencoder

In [31]:
autoencoder = create_autoencoder((129, 129, 1))

In [15]:
#图像预处理归一化
def preprocess_image(image):
    # 计算每张图像的最小值和最大值
    min_value = tf.reduce_min(image)
    max_value = tf.reduce_max(image)

    # 线性缩放图像数据到[-1, 1]
    image_normalized = (image - min_value) / (max_value - min_value) * 2.0 - 1.0
    
    # 确保图像数据类型为float32
    image_normalized = tf.cast(image_normalized, tf.float32)
    
    # 增加一个通道维度（变成形状 [129, 129, 1]）
    image_normalized = tf.expand_dims(image_normalized, axis=-1)
    
    return image_normalized

In [9]:
train_data = np.load('D:/科大研究生生活/文章/2023.10后补充工作/数据/按炮划分数据/第三次筛选/val_output.npy')

In [20]:
train_data = tf.convert_to_tensor(train_data, dtype=tf.float32)

In [21]:
# 创建 TensorFlow 数据集对象
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_data))

# 应用预处理函数
train_dataset = train_dataset.map(lambda x, y: (preprocess_image(x), preprocess_image(y)))  #逐张图像计算最大/最小值

In [22]:
# 设置批量大小
batch_size = 32

# 批处理和预取
train_dataset = train_dataset.batch(batch_size).repeat()

In [32]:
# 调用你之前定义的模型创建函数
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

In [33]:
# 开始训练
history = autoencoder.fit(train_dataset, epochs=50, steps_per_epoch=len(train_data) // batch_size)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [86]:
autoencoder.save('D:/科大研究生生活/课题/异常值检测/模型/autoencoder1.h5')

In [64]:
test_data1 = np.load('D:/科大研究生生活/文章/2023.10后补充工作/数据/按炮划分数据/第三次筛选/test_output.npy')

In [60]:
test_data1 = tf.convert_to_tensor(test_data1,dtype=tf.float32)
test_set1 = tf.data.Dataset.from_tensor_slices(test_data1)
test_set1 = test_set1.map(lambda x: (preprocess_image(x)))

In [61]:
test_set1 = test_set1.batch(batch_size)


In [62]:
reconstructions = autoencoder.predict(test_set1)



In [63]:
reconstructions = reconstructions.reshape(-1,129,129)

In [65]:
reconstruction_errors = tf.keras.losses.mse(reconstructions.reshape(-1, 129 * 129), 
                                            test_data1.reshape(-1, 129 * 129))

In [85]:
reconstruction_errors.numpy()

array([0.37772849, 0.35040021, 0.33243734, ..., 0.10774501, 0.10686203,
       0.10507507])

In [78]:
# 设定异常检测阈值
error_threshold = np.percentile(reconstruction_errors.numpy(), 99)  # 假设异常值占5%

In [79]:
# 检测异常
outliers = reconstruction_errors > error_threshold

In [80]:
outliers.numpy()

array([False, False, False, ..., False, False, False])

In [70]:
import matplotlib.pyplot as plt

In [87]:
shot = np.load('D:/科大研究生生活/文章/2023.10后补充工作/数据/夏翔泽下载数据/output_gkfiles66011_67142/66011/PSIRZ.npy')

In [88]:
shot1 = tf.convert_to_tensor(shot,dtype=tf.float32)
shot11 = tf.data.Dataset.from_tensor_slices(shot1)
shot11 = shot11.map(lambda x: (preprocess_image(x)))
shot11 = shot11.batch(batch_size)

In [89]:
reconstructions1 = autoencoder.predict(shot11)



In [90]:
reconstruction_errors1 = tf.keras.losses.mse(reconstructions1.reshape(-1, 129 * 129), 
                                            shot.reshape(-1, 129 * 129))

In [91]:
reconstruction_errors1.numpy()

array([0.76738561, 0.56162232, 0.70253203, 0.51729373, 0.47522321,
       0.39585598, 0.34694294, 0.32832788, 0.31118652, 0.2787368 ,
       0.25999757, 0.24267284, 0.22269321, 0.20293254, 0.19012982,
       0.18183247, 0.17654315, 0.17080666, 0.1665458 , 0.16424232,
       0.16014433, 0.15823183, 0.15643035, 0.15381927, 0.15065232,
       0.14772943, 0.14488151, 0.14160973, 0.13838509, 0.13421854,
       0.13156297, 0.12803568, 0.12452771, 0.12126829, 0.11769959,
       0.11446555, 0.11142852, 0.1082082 , 0.10528208, 0.10247849,
       0.09943938, 0.0965258 , 0.09389726, 0.09127155, 0.08823413,
       0.08571482, 0.08275791, 0.07981045, 0.07704219, 0.07463418,
       0.07003785, 0.06592981, 0.06201367, 0.05847181, 0.05533423,
       0.05192824, 0.04889249, 0.0458045 , 0.04278749, 0.03993524,
       0.03668927, 0.03401839, 0.03139169, 0.02875711, 0.0264774 ,
       0.02396065, 0.02163515, 0.01964228, 0.01753733, 0.01591739,
       0.01444939, 0.01271006, 0.01136175, 0.01014233, 0.00925