# 尝试反向建模生成制备参数

In [84]:
from time import time
from datetime import datetime
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy import random, genfromtxt
from IPython.display import display
from matplotlib import rc
from matplotlib.pyplot import figure
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as F
import matplotlib.ticker as mticker
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from torch.nn.utils.rnn import pad_sequence

In [85]:
current_directory = os.getcwd()
print(current_directory)
device=torch.device("cuda")
# 检查是否有可用的 GPU
if torch.cuda.is_available():
    device = torch.device("cuda")  # 使用 GPU
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")  # 使用 CPU
    print("No GPU available, using CPU instead.")

/home/redfu/work/Constitutive_Equation/MFNN/singlefluid
Using GPU: NVIDIA GeForce RTX 2080 Ti


In [86]:
# 设置随机种子
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# 读取 Excel 文件中的所有 sheet
url_hf = 'Data/DifferComponent_data.xlsm'
df_HF = pd.read_excel(url_hf, sheet_name=None)

# 初始化存储输入和标签的列表
inputs = []  # 存储每个 sheet 的 AngFreq 和 lossF
labels = []  # 存储每个 sheet 的 Mn1, Mn2, Mn3, Mn11, Mn22, Mn33
sequence_lengths = []  # 存储每个 sheet 的长度

# 遍历所有 sheet
for sheet_name, df in df_HF.items():
    # 移除包含 NaN 的行
    df = df.dropna()
    if sheet_name == 's':
        continue
    # 提取 AngFreq 和 lossF 作为输入特征
    ang_freq = df['AngFreq'].values.astype(np.float32)
    loss_f = df['lossF'].values.astype(np.float32)
    
    # 对 AngFreq 进行对数化处理
    ang_freq_log = np.log10(ang_freq)  # 使用 log10，也可以使用自然对数 np.log
    
    # 合并为 (N, 2) 的数组
    input_feature = np.column_stack((ang_freq_log, loss_f))
    
    # 提取 Mn1, Mn2, Mn3, Mn11, Mn22, Mn33 作为标签（只取第一行）
    label = df[['Mn1', 'Mn2', 'Mn3', 'Mn11', 'Mn22', 'Mn33']].iloc[0].values.astype(np.float32)
    
    # 保存输入和标签
    inputs.append(input_feature)
    labels.append(label)
    sequence_lengths.append(len(input_feature))  # 记录序列长度

# 对输入特征进行归一化
input_scaler = MinMaxScaler()
inputs_normalized = [input_scaler.fit_transform(x) for x in inputs]  # 对每个序列单独归一化

# 对标签进行归一化
label_scaler = MinMaxScaler()
labels_normalized = label_scaler.fit_transform(np.array(labels))  # 对所有标签一起归一化

# 将输入和标签转换为 PyTorch 张量
inputs_tensor = [torch.tensor(x, dtype=torch.float32) for x in inputs_normalized]
labels_tensor = torch.tensor(labels_normalized, dtype=torch.float32)

# 对变长序列进行填充
inputs_padded = pad_sequence(inputs_tensor, batch_first=True)  # 填充为相同长度

# 记录每个序列的实际长度
sequence_lengths = torch.tensor(sequence_lengths, dtype=torch.long)

# 打印填充后的输入和标签形状
print("Padded inputs shape:", inputs_padded.shape)  # (num_sheets, max_length, 2)
print("Labels shape:", labels_tensor.shape)  # (num_sheets, 6)
print("Sequence lengths:", sequence_lengths)

Padded inputs shape: torch.Size([6, 61, 2])
Labels shape: torch.Size([6, 6])
Sequence lengths: tensor([36, 35, 61, 36, 41, 42])


In [87]:
from torch.utils.data import DataLoader, TensorDataset

# 创建数据集
dataset = TensorDataset(inputs_padded, labels_tensor, sequence_lengths)

# 创建 DataLoader
batch_size = 1  # 根据需求调整
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [88]:
class VAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim, output_dim):
        super(VAE, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        self.output_dim = output_dim
        
        # 编码器
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=input_dim, out_channels=16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Flatten()
        )
        
        # 潜在空间的均值和方差
        # 这里暂时不定义 fc_mu 和 fc_logvar，因为在 forward 中动态计算
        self.fc_mu = None
        self.fc_logvar = None
        
        # 解码器
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        # 动态计算 max_length
        batch_size, _, max_length = x.shape
        
        # 编码
        h = self.encoder(x.permute(0, 2, 1))  # 转换为 (batch_size, 2, sequence_length)
        
        # 动态定义 fc_mu 和 fc_logvar
        if self.fc_mu is None:
            self.fc_mu = nn.Linear(h.shape[1], self.latent_dim).to(x.device)
            self.fc_logvar = nn.Linear(h.shape[1], self.latent_dim).to(x.device)
        
        mu, logvar = self.fc_mu(h), self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        
        # 解码
        return self.decoder(z), mu, logvar

In [89]:
# 初始化模型
input_dim = 2  # AngFreq 和 lossF
hidden_dim = 4
latent_dim = 10
output_dim = 6  # Mn1, Mn2, Mn3, Mn11, Mn22, Mn33
model = VAE(input_dim, hidden_dim, latent_dim, output_dim)

# 定义优化器
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# 定义损失函数
def loss_function(recon_x, x, mu, logvar):
    # 重构损失
    BCE = nn.functional.mse_loss(recon_x, x, reduction='sum')
    # KL散度
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

# 训练
for epoch in range(1000):  # 假设训练 10 个 epoch
    for batch in dataloader:
        inputs, labels, lengths = batch
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(inputs)
        loss = loss_function(recon_batch, labels, mu, logvar)
        loss.backward()
        optimizer.step()
    if epoch%100==0:
        print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

Epoch 1, Loss: 2.4003


Epoch 101, Loss: 0.9940
Epoch 201, Loss: 0.6837
Epoch 301, Loss: 2.0237
Epoch 401, Loss: 0.3791
Epoch 501, Loss: 0.5744
Epoch 601, Loss: 0.5755
Epoch 701, Loss: 0.5737
Epoch 801, Loss: 0.8557
Epoch 901, Loss: 1.8121


In [94]:
# 训练结束后，打印模型对 inputs 的输出并反归一化
with torch.no_grad():  # 禁用梯度计算
    model.eval()  # 将模型设置为评估模式
    recon_batch, mu, logvar = model(inputs)  # 获取模型输出
    
    # 将模型输出反归一化
    recon_batch_np = recon_batch.detach().numpy()  # 转换为 NumPy 数组
    recon_batch_original = label_scaler.inverse_transform(recon_batch_np)  # 反归一化
    
    print("Model output for inputs (after training):")
    print(recon_batch_original)  # 打印反归一化后的模型输出

Model output for inputs (after training):
[[3.6120186e+01 9.8328957e+01 3.7598652e+01 2.1636653e-01 3.1900376e-01
  6.6812620e-02]]
