In [2]:
import numpy as np
from pymatgen.core import Structure
from megnet.models import MEGNetModel
from megnet.data.crystal import CrystalGraph
import json

In [None]:
# 对DFT直接拟合

In [3]:
with open("/root/home/jupyter/DFT/bandgap/bandgap_json/DFT_bandgapTrain1.json", "r") as f:
    data_train = json.load(f)

In [4]:
structures = []
DFT_bandgaps = []

for item in data_train:
    structure = Structure.from_dict(item['structure'])
    gap = item['bandgap']
    structures.append(structure)
    DFT_bandgaps.append(gap)

In [5]:
#5是最低截断半径，用以判断哪两个原子之间有键连接

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# 设置参数列表
nfeat_bond_list = [10, 15, 20, 25, 30]
r_cutoff_list = [6, 7, 8, 9, 10]

# 初始化结果记录
results = []

# 循环训练模型
for nfeat_bond in nfeat_bond_list:
    for r_cutoff in r_cutoff_list:
        # 设置参数
        gaussian_centers = np.linspace(0, r_cutoff+1, nfeat_bond)
        gaussian_width = 0.5
        graph_converter = CrystalGraph(cutoff=r_cutoff)
        
        # 初始化模型
        model = MEGNetModel(graph_converter=graph_converter, centers=gaussian_centers, width=gaussian_width)
        
        # 训练模型
        model.train(structures,DFT_bandgaps, epochs=100)
        
        # 预测结果
        predictions = []
        for structure in structures:
            prediction = model.predict_structure(structure)
            predictions.append(prediction[0])
        
        # 计算评估指标
        mse = mean_squared_error(DFT_bandgaps, predictions)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(DFT_bandgaps, predictions)
        r2 = r2_score(DFT_bandgaps, predictions)
        
        # 记录结果
        results.append({
            'nfeat_bond': nfeat_bond,
            'r_cutoff': r_cutoff,
            'mse': mse,
            'rmse': rmse,
            'mae': mae,
            'r2': r2
        })



2023-12-01 09:52:21.396633: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-01 09:52:21.426402: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# 打印结果
for result in results:
    print(f"nfeat_bond: {result['nfeat_bond']}, r_cutoff: {result['r_cutoff']}, MSE: {result['mse']}, RMSE: {result['rmse']}, MAE: {result['mae']}, R2: {result['r2']}")


In [9]:
def get_best_r2(results):
    # 找到R2最大的结果
    best_result = max(results, key=lambda result: result['r2'])
    return best_result

def get_best_rmse(results):
    # 计算每个结果的RMSE
    for result in results:
        result['rmse'] = np.sqrt(result['mse'])
    
    # 找到RMSE最小的结果
    best_result = min(results, key=lambda result: result['rmse'])
    return best_result


In [17]:
best_r2_result = get_best_r2(results)
print(f"Best R2: {best_r2_result['r2']}, nfeat_bond: {best_r2_result['nfeat_bond']}, r_cutoff: {best_r2_result['r_cutoff']}")

best_rmse_result = get_best_rmse(results)
print(f"Best RMSE: {best_rmse_result['rmse']}, nfeat_bond: {best_rmse_result['nfeat_bond']}, r_cutoff: {best_rmse_result['r_cutoff']}")


Best R2: 0.9270888299996547, nfeat_bond: 10, r_cutoff: 8
Best RMSE: 0.10564542482733762, nfeat_bond: 10, r_cutoff: 8


In [None]:
def get_best_mae(results):
    # 计算每个结果的MAE
    for result in results:
        result['mae'] = mean_absolute_error(gaps, predictions)
    
    # 找到MAE最小的结果
    best_result = min(results, key=lambda result: result['mae'])
    return best_result

In [18]:
best_mae_result = get_best_mae(results)
print(f"Best MAE: {best_mae_result['mae']}, nfeat_bond: {best_mae_result['nfeat_bond']}, r_cutoff: {best_mae_result['r_cutoff']}")


Best MAE: 0.07920198556232247, nfeat_bond: 10, r_cutoff: 6


In [19]:
!nvidia-smi

Sun Nov 26 22:41:46 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.12             Driver Version: 535.104.12   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-SXM2-32GB           On  | 00000000:00:08.0 Off |                    0 |
| N/A   31C    P0              22W / 300W |      2MiB / 32768MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [7]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# 设置参数列表
nfeat_bond_list2 = [10]
r_cutoff_list2 = [10,12]#越高，计算时间越长，更多的边，键长信息将被处理

# 初始化结果记录
results2 = []

# 循环训练模型
for nfeat_bond in nfeat_bond_list2:
    for r_cutoff in r_cutoff_list2:
        # 设置参数
        gaussian_centers = np.linspace(0, r_cutoff+1, nfeat_bond)
        gaussian_width = 0.5
        graph_converter = CrystalGraph(cutoff=r_cutoff)
        
        # 初始化模型
        model = MEGNetModel(graph_converter=graph_converter, centers=gaussian_centers, width=gaussian_width)
        
        # 训练模型
        model.train(structures, gaps, epochs=100)
        
        # 预测结果
        predictions = []
        for structure in structures:
            prediction = model.predict_structure(structure)
            predictions.append(prediction[0])
        
        # 计算评估指标
        mse = mean_squared_error(gaps, predictions)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(gaps, predictions)
        r2 = r2_score(gaps, predictions)
        
        # 记录结果
        results2.append({
            'nfeat_bond': nfeat_bond,
            'r_cutoff': r_cutoff,
            'mse': mse,
            'rmse': rmse,
            'mae': mae,
            'r2': r2
        })

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [8]:
# 打印结果
for result in results2:
    print(f"nfeat_bond: {result['nfeat_bond']}, r_cutoff: {result['r_cutoff']}, MSE: {result['mse']}, RMSE: {result['rmse']}, MAE: {result['mae']}, R2: {result['r2']}")


nfeat_bond: 10, r_cutoff: 10, MSE: 0.020761438802568193, RMSE: 0.1440883021017605, MAE: 0.1092880264815893, R2: 0.8643717596519871
nfeat_bond: 10, r_cutoff: 12, MSE: 0.0441800792883566, RMSE: 0.210190578495699, MAE: 0.1498537179177882, R2: 0.7113848192653066
