In [4]:
import os
import pandas as pd
from pymatgen.core import Composition
import csv

In [5]:
# Element list
X_elements = ["Li", "Na", "K"]
B_elements = ["O", "S", "Se", "Te", "OH"]
A_elements = {"F", "Cl", "Br", "I", "BH4", "BF4", "BCl4", "AlH4", "AlF4", "AlCl4", "NH2", "NO2", "ClO4", "IO4", "ICl4", "CN"}

In [14]:
# Chemical formula of pure structures
combinations = [["formula", "X", "B", "A", "A_w", "A_d", "A_dw"]] # w:weight; d:doping
for X in X_elements:
    for B in B_elements:
        for A in A_elements:
            X_valence = 1
            B_valence = -2
            A_valence = -1
            if B == 'OH':
                B_valence = -1
            X_ratio = (abs(A_valence) + abs(B_valence)) // abs(X_valence)
            formula = f"{X}{X_ratio}{B}{A}"
            combinations.append([formula, X, B, A, 1])

            # Chemical formula of doping structures
            for A_mixed in A_elements:
                if A != A_mixed and A < A_mixed:  # 添加字母表顺序检查，确保 A 排在 A_mixed 前面
                    for ratio in [0.125, 0.25, 0.375, 0.50, 0.625, 0.75, 0.875]:
                        if len(A) > 2 and len(A_mixed) > 2:
                            formula_mixed = f"{X}{X_ratio}{B}({A}){1-ratio}({A_mixed}){ratio}"
                        elif A == 'CN':
                            if len(A_mixed) > 2:
                                formula_mixed = f"{X}{X_ratio}{B}({A}){1-ratio}({A_mixed}){ratio}"
                            else:
                                formula_mixed = f"{X}{X_ratio}{B}({A}){1-ratio}{A_mixed}{ratio}"
                        elif A_mixed == 'CN':
                            if len(A) > 2:
                                formula_mixed = f"{X}{X_ratio}{B}({A}){1-ratio}({A_mixed}){ratio}"
                            else:
                                formula_mixed = f"{X}{X_ratio}{B}{A}{1-ratio}({A_mixed}){ratio}"
                        elif len(A) > 2:
                            formula_mixed = f"{X}{X_ratio}{B}({A}){1-ratio}{A_mixed}{ratio}"
                        elif len(A_mixed) > 2:
                            formula_mixed = f"{X}{X_ratio}{B}{A}{1-ratio}({A_mixed}){ratio}"
                        else:
                            formula_mixed = f"{X}{X_ratio}{B}{A}{1-ratio}{A_mixed}{ratio}"
                        combinations.append([formula_mixed, X, B, A, 1 - ratio, A_mixed, ratio])

In [15]:
df = pd.DataFrame(combinations[1:], columns=combinations[0])
# Extract composition from formula
df['composition'] = df['formula'].apply(lambda x: Composition(x).formula)
df

Unnamed: 0,formula,X,B,A,A_w,A_d,A_dw,composition
0,Li3OI,Li,O,I,1.000,,,Li3 I1 O1
1,Li3OI0.875(NO2)0.125,Li,O,I,0.875,NO2,0.125,Li3 I0.875 N0.125 O1.25
2,Li3OI0.75(NO2)0.25,Li,O,I,0.750,NO2,0.250,Li3 I0.75 N0.25 O1.5
3,Li3OI0.625(NO2)0.375,Li,O,I,0.625,NO2,0.375,Li3 I0.625 N0.375 O1.75
4,Li3OI0.5(NO2)0.5,Li,O,I,0.500,NO2,0.500,Li3 I0.5 N0.5 O2
...,...,...,...,...,...,...,...,...
12835,K2OH(NH2)0.625(NO2)0.375,K,OH,NH2,0.625,NO2,0.375,K2 H2.25 N1 O1.75
12836,K2OH(NH2)0.5(NO2)0.5,K,OH,NH2,0.500,NO2,0.500,K2 H2 N1 O2
12837,K2OH(NH2)0.375(NO2)0.625,K,OH,NH2,0.375,NO2,0.625,K2 H1.75 N1 O2.25
12838,K2OH(NH2)0.25(NO2)0.75,K,OH,NH2,0.250,NO2,0.750,K2 H1.5 N1 O2.5


In [16]:
# save to csv
folder_name = '../Data'
if not os.path.exists(folder_name):
    os.makedirs(folder_name)
df.to_csv(f'{folder_name}/1_data_with_raw_APs.csv',index=False)