In [1]:
#提取HOMO、LUMO并计算gap，提取结果和保存路径在脚本末尾填写
import pandas as pd
import os

In [2]:
def get_homolumo(homolumo_path):    
    # 假设homolumo_path已定义，指向包含.log文件的目录
    filename = os.listdir(homolumo_path)
    homo_lumo_data = []

    for name in filename:
        file_path = os.path.join(homolumo_path, name)
        if os.path.isfile(file_path) and name.endswith('.log'):
            with open(file_path, 'r') as file:
                lines = file.readlines()

                # 找到最后一个包含'Alpha  occ. eigenvalues'的行的索引
                last_occ_index = None
                for i in range(len(lines)-1, -1, -1):
                    if 'Alpha  occ. eigenvalues' in lines[i]:
                        last_occ_index = i
                        break  # 找到后立即退出循环

                if last_occ_index is not None:
                    # 提取HOMO值
                    occ_line = lines[last_occ_index].strip()
                    occ_values = occ_line.split()[4:]  # 提取数值部分
                    if occ_values:
                        HOMO = float(occ_values[-1])  # 取最后一个数值
                    else:
                        HOMO = None

                    # 提取LUMO值
                    # 假设下一行即为'Alpha virt. eigenvalues'所在行
                    if last_occ_index + 1 < len(lines):
                        virt_line = lines[last_occ_index + 1].strip()
                        if 'Alpha virt. eigenvalues' in virt_line:
                            virt_values = virt_line.split()[4:]
                            if virt_values:
                                LUMO = float(virt_values[0])  # 取第一个数值
                            else:
                                LUMO = None
                        else:
                            LUMO = None  # 如果下一行不含'Alpha virt. eigenvalues'
                    else:
                        LUMO = None  # 如果没有下一行

                    if HOMO is not None and LUMO is not None:
                        Gap = LUMO - HOMO
                    else:
                        Gap = None  # 无法计算Gap
                else:
                    HOMO = None
                    LUMO = None
                    Gap = None  # 未找到'Alpha  occ. eigenvalues'行，无法获取值

                # 取文件名（不含扩展名）
                filename_no_ext = name.rsplit('.', 1)[0]

                homo_lumo_data.append({
                    'Filename': filename_no_ext,
                    'HOMO': HOMO,
                    'LUMO': LUMO,
                    'Gap': Gap
                })

    homo_lumo_data_df = pd.DataFrame(homo_lumo_data)
    return homo_lumo_data_df

In [3]:
# 调用函数，此处填log文件路径
# homo_lumo_data_df = get_homolumo(r'C:\Users\xiaoyu\Desktop\click\4-raman_info\Extract_info_from_6-31gd-opt-no-raman\logs')
homo_lumo_data_df_extended = get_homolumo('data/gaussian-logs/')

In [5]:
from sklearn.preprocessing import StandardScaler

In [6]:
# homo_lumo_data_df_without_fn = homo_lumo_data_df.drop(['Filename'], axis=1)
homo_lumo_data_df_without_fn = homo_lumo_data_df_extended.drop(['Filename'], axis=1)
scaler1 = StandardScaler()
homo_lumo_data_df_without_fn_scaled = pd.DataFrame(scaler1.fit_transform(homo_lumo_data_df_without_fn),
                                                   columns=homo_lumo_data_df_without_fn.columns,
                                                   index=homo_lumo_data_df_without_fn.index)
homo_lumo_data_df_without_fn_scaled

Unnamed: 0,HOMO,LUMO,Gap
0,0.223603,-0.664105,-0.542035
1,0.135074,0.533516,0.293937
2,0.101724,0.508907,0.292574
3,0.795405,-0.490614,-0.683818
4,0.646239,-0.574284,-0.672366
5,0.575901,-0.624321,-0.674002
6,1.355685,0.011402,-0.60202
7,0.640782,-0.721936,-0.76807
8,0.466755,-0.822421,-0.756618
9,0.566199,0.03396,-0.232022


In [7]:
# homo_lumo_data_df_scaled = homo_lumo_data_df_without_fn_scaled.join(homo_lumo_data_df['Filename'])
homo_lumo_data_df_scaled = homo_lumo_data_df_without_fn_scaled.join(homo_lumo_data_df_extended['Filename'])
homo_lumo_data_df_scaled

Unnamed: 0,HOMO,LUMO,Gap,Filename
0,0.223603,-0.664105,-0.542035,E1
1,0.135074,0.533516,0.293937,E10
2,0.101724,0.508907,0.292574,E11
3,0.795405,-0.490614,-0.683818,E12
4,0.646239,-0.574284,-0.672366,E13
5,0.575901,-0.624321,-0.674002,E14
6,1.355685,0.011402,-0.60202,E15
7,0.640782,-0.721936,-0.76807,E16
8,0.466755,-0.822421,-0.756618,E17
9,0.566199,0.03396,-0.232022,E18


In [8]:
# homo_lumo_data_df_scaled.to_csv(r'C:\Users\xiaoyu\Desktop\click\4-raman_info\Extract_info_from_6-31gd-opt-no-raman\Homo_Lumo_scaled.csv', index=False, encoding='utf-8')
homo_lumo_data_df_scaled.to_csv('./extended_data/4.Homo_Lumo_scaled.csv', index=False, encoding='utf-8')

In [9]:
# path = r'C:\Users\xiaoyu\Desktop\click\4-raman_info\Extract_info_from_6-31gd-opt-no-raman\Homo_Lumo_scaled.csv'
path = './extended_data/Homo_Lumo_scaled.csv'

homo_lumo_df = pd.read_csv(path)

In [10]:
homo_lumo_df

Unnamed: 0,HOMO,LUMO,Gap,Filename
0,0.223603,-0.664105,-0.542035,E1
1,0.135074,0.533516,0.293937,E10
2,0.101724,0.508907,0.292574,E11
3,0.795405,-0.490614,-0.683818,E12
4,0.646239,-0.574284,-0.672366,E13
5,0.575901,-0.624321,-0.674002,E14
6,1.355685,0.011402,-0.60202,E15
7,0.640782,-0.721936,-0.76807,E16
8,0.466755,-0.822421,-0.756618,E17
9,0.566199,0.03396,-0.232022,E18
