In [42]:
import pandas as pd

data = pd.read_csv('HEA.csv')

In [43]:
def _generate_BMGs_string(s):
        """
        Generate a BMGs string based on the vector 's' and the columns of the Excel file.
        
        Returns:
            str: BMGs string
        """
        
        columns = ['Ag','Al','Au','B','Be','C','Ca','Ce','Co','Cr','Cu','Dy','Er','Fe','Ga','Gd','Ge','Hf','La','Li','Lu','Mg','Mn','Mo','Nb','Nd','Ni','Pd','Pr','Rh','Ru','Sc','Si','Sm','Sn','Sr','Ta','Tb','Ti','Tm','V','W','Y','Yb','Zn','Zr']
        assert len(s) == len(columns), "The length of the vector 's' must match the number of columns in the Excel file."

        # Create a dictionary from relevant columns and vector 's'
        col_s_dict = {col: s[i] for i, col in enumerate(columns)}
        
        # Sort columns by the value in 's' and create the BMGs string
        sorted_cols = sorted(col_s_dict, key=lambda c: col_s_dict[c], reverse=True)
        bmg_string = ''.join([f"{col}{round(col_s_dict[col], 2)}" for col in sorted_cols if col_s_dict[col] != 0])
        
        return bmg_string

In [44]:
# 仅保留AAAAAAProcessing Condition列值为AC的行
# data = data[data['AAAAAAProcessing Condition'] == 'AC']
chem_columns = ['Ag','Al','Au','B','Be','C','Ca','Ce','Co','Cr','Cu','Dy','Er','Fe','Ga','Gd','Ge','Hf','La','Li','Lu','Mg','Mn','Mo','Nb','Nd','Ni','Pd','Pr','Rh','Ru','Sc','Si','Sm','Sn','Sr','Ta','Tb','Ti','Tm','V','W','Y','Yb','Zn','Zr']
# chem_columns * 100
data[chem_columns] = data[chem_columns] * 100
# 把chem_columns列的值转换为BMGs string
data['GFA'] = data[chem_columns].apply(_generate_BMGs_string, axis=1)

data['sum'] = data[chem_columns].sum(axis=1)
for i in range(len(data)):
    if abs(data['sum'].iloc[i] - 100) > 0.1:
        print(f'{i}行sum不为1,值为{data["sum"].iloc[i]}')
# drop REF,AAA-System, sum, AAAAAAProcessing Condition,PHASE,Phase,H    ev/atom,Hf  kJ/mol,Hmix,Smix/R,Delta,HV,compressive stregth列
data = data.drop(columns=['REF','AAA-System','sum','AAAAAAProcessing Condition','PHASE','Phase','H    ev/atom','Hf  kJ/mol','Hmix','Smix/R','Delta' , 'HV','compressive stregth'])
# chem_columns列fill nan with 0
data[chem_columns] = data[chem_columns].fillna(0)
# 检查除了GFA，AAAAlloys列之外，其他的列是否都是数字和nan
for col in data.columns:
    if col not in ['GFA','AAAAlloys']:
        for i in range(len(data)):
            if not pd.isnull(data[col].iloc[i]):
                try:
                    float(data[col].iloc[i])
                except:
                    print(f'{i}行{col}列不是数字')


In [40]:
# data 丢弃Modulus (GPa)	yield(MPa)	Ε(%)都为nan的行
data = data.dropna(subset=['Modulus (GPa)','yield(MPa)','Ε(%)'], how='all')
data.to_excel('HEA_GFA.xlsx', index=False)