In [1]:
# Pandas and numpy for data manipulation
import pandas as pd
import numpy as np
import random
import torch

seed = 42
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    
# No warnings about setting value on copy of slice
pd.options.mode.chained_assignment = None

# Display up to 60 columns of a dataframe
pd.set_option('display.max_columns', 60)

# Matplotlib visualization
import matplotlib.pyplot as plt
from matplotlib import rcParams
%matplotlib inline

# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize

# Seaborn for visualization
import seaborn as sns

# Splitting data into training and testing
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

# data

In [2]:
data = pd.read_csv('data.csv', encoding='gbk', low_memory=False)
data

Unnamed: 0,Cell_architecture,Substrate_stack_sequence,ETL_stack_sequence,ETL_additives_compounds,ETL_deposition_procedure,Perovskite_composition_perovskite_ABC3_structure,Perovskite_composition_none_stoichiometry_components_in_excess,Perovskite_additives_compounds,Perovskite_deposition_procedure,DMF_DMSO,Perovskite_deposition_quenching_media,Perovskite_deposition_quenching_media_additives_compounds,Perovskite_deposition_solvent_annealing,HTL_stack_sequence,HTL_additives_compounds,HTL_deposition_procedure,Backcontact_stack_sequence,Backcontact_thickness_list,Backcontact_deposition_procedure,JV_default_Voc,JV_default_Jsc,JV_default_FF,JV_default_PCE,Perovskite_deposition_solvents_IPA,Perovskite_deposition_thermal_annealing
0,nip,SLG | FTO,TiO2-c | TiO2-nw,Undoped,CBD | Hydrothermal,1,none,Cl,Spin-coating,1.000,Undoped,Undoped,0,Spiro-MeOTAD,Li-TFSI; TPB,Spin-coating,Au,80.0,Evaporation,0.800,20.40,0.630,10.30,0,110-60.0
1,nip,SLG | FTO,TiO2-c | TiO2-nw,Er,CBD | Hydrothermal,1,none,Cl,Spin-coating,1.000,Undoped,Undoped,0,Spiro-MeOTAD,Li-TFSI; TPB,Spin-coating,Au,80.0,Evaporation,0.830,21.30,0.640,11.50,0,110-60.0
2,nip,SLG | FTO,TiO2-c | TiO2-nw,Er; Yb,CBD | Hydrothermal,1,none,Cl,Spin-coating,1.000,Undoped,Undoped,0,Spiro-MeOTAD,Li-TFSI; TPB,Spin-coating,Au,80.0,Evaporation,0.870,21.70,0.660,12.90,0,110-60.0
3,nip,SLG | FTO,TiO2-c,Undoped,Spin-coating,1,none,Cl,Spin-coating,1.000,Undoped,Undoped,0,Spiro-MeOTAD,Li-TFSI; TPB,Spin-coating,Au,60.0,Evaporation,0.913,15.77,0.700,10.00,0,100.0-5.0
4,nip,SLG | FTO,TiO2-c | TiO2-mp,TiCl4,Spin-coating | Spin-coating,1,MA,Undoped,Spin-coating,1.000,Undoped,Undoped,0,Spiro-MeOTAD,Li-TFSI; TPB,Spin-coating,Au,80.0,Evaporation,1.080,21.70,0.725,17.00,0,100.0-5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3520,nip,SLG | FTO,TiO2-c | TiO2-mp,Li-TFSI,Spray-pyrolys | Spin-coating,1,none,Az,Spray-coating,1.000,Undoped,Undoped,0,Spiro-MeOTAD,FK209; Li-TFSI; TBP,Spin-coating,Au,100.0,Evaporation,0.925,18.10,0.650,11.10,1,100.0-60.0
3521,nip,SLG | FTO,TiO2-c | TiO2-mp,Li-TFSI,Spray-pyrolys | Spin-coating,1,none,Az,Spin-coating,1.000,Undoped,Undoped,0,Spiro-MeOTAD,FK209; Li-TFSI; TBP,Spin-coating,Au,100.0,Evaporation,0.909,19.05,0.690,13.00,1,100.0-60.0
3522,nip,SLG | FTO,TiO2-c | TiO2-mp,In,Spin-coating | Spin-coating,1,none,Undoped,Spin-coating,0.800,Chlorobenzene,Undoped,0,Spiro-MeOTAD,Li-TFSI; TBP,Spin-coating,Au,100.0,Evaporation,1.100,22.30,0.771,18.90,0,100.0-10.0
3523,nip,SLG | ITO,TiO2-c | TiO2-mp,In,Spin-coating | Spin-coating,1,none,Undoped,Spin-coating,0.800,Chlorobenzene,Undoped,0,Spiro-MeOTAD,Li-TFSI; TBP,Spin-coating,Au,100.0,Evaporation,1.090,23.00,0.750,18.80,0,100.0-10.0


In [3]:
from sklearn.preprocessing import LabelEncoder
import time

def LabelEncoder_ifm(feature_name,data,LabelEncoder_dic):
    '''
        feature_name: Name of feature column to be converted
        data: Data that needs to be updated
        LabelEncoder_dic: Dictionary that needs to be stored
    
    '''
    encoder_fit = LabelEncoder().fit(data[feature_name])
    encoder_data = encoder_fit.transform(data[feature_name])

    # Corresponding dictionary before and after encoding
    encoder_dict = dict(zip(encoder_data,encoder_fit.inverse_transform(encoder_data)))
    LabelEncoder_dic = LabelEncoder_dic.update(encoder_dict)
    
    # update
    data[feature_name] = encoder_data 

In [4]:
Cell_architecture_dic={}
LabelEncoder_ifm("Cell_architecture",data,Cell_architecture_dic)
Substrate_stack_sequence_dic={}
LabelEncoder_ifm("Substrate_stack_sequence",data,Substrate_stack_sequence_dic)
ETL_stack_sequence_dic={}
LabelEncoder_ifm("ETL_stack_sequence",data,ETL_stack_sequence_dic)
ETL_additives_compounds_dic={}
LabelEncoder_ifm("ETL_additives_compounds",data,ETL_additives_compounds_dic)
ETL_deposition_procedure_dic={}
LabelEncoder_ifm("ETL_deposition_procedure",data,ETL_deposition_procedure_dic)
Perovskite_composition_perovskite_ABC3_structure_dic={}
LabelEncoder_ifm("Perovskite_composition_perovskite_ABC3_structure",data,Perovskite_composition_perovskite_ABC3_structure_dic)
Perovskite_composition_none_stoichiometry_components_in_excess_dic={}
LabelEncoder_ifm("Perovskite_composition_none_stoichiometry_components_in_excess",data,Perovskite_composition_none_stoichiometry_components_in_excess_dic)
Perovskite_additives_compounds_dic={}
LabelEncoder_ifm("Perovskite_additives_compounds",data,Perovskite_additives_compounds_dic)
Perovskite_deposition_procedure_dic={}
LabelEncoder_ifm("Perovskite_deposition_procedure",data,Perovskite_deposition_procedure_dic)
Perovskite_deposition_solvents_IPA_dic={}
LabelEncoder_ifm("Perovskite_deposition_solvents_IPA",data,Perovskite_deposition_solvents_IPA_dic)
Perovskite_deposition_quenching_media_dic={}
LabelEncoder_ifm("Perovskite_deposition_quenching_media",data,Perovskite_deposition_quenching_media_dic)
Perovskite_deposition_quenching_media_additives_compounds_dic={}
LabelEncoder_ifm("Perovskite_deposition_quenching_media_additives_compounds",data,Perovskite_deposition_quenching_media_additives_compounds_dic)
Perovskite_deposition_thermal_annealing_dic={}
LabelEncoder_ifm("Perovskite_deposition_thermal_annealing",data,Perovskite_deposition_thermal_annealing_dic)
Perovskite_deposition_solvent_annealing_dic={}
LabelEncoder_ifm("Perovskite_deposition_solvent_annealing",data,Perovskite_deposition_solvent_annealing_dic)
HTL_stack_sequence_dic={}
LabelEncoder_ifm("HTL_stack_sequence",data,HTL_stack_sequence_dic)
HTL_additives_compounds_dic={}
LabelEncoder_ifm("HTL_additives_compounds",data,HTL_additives_compounds_dic)
HTL_deposition_procedure_dic={}
LabelEncoder_ifm("HTL_deposition_procedure",data,HTL_deposition_procedure_dic)
Backcontact_stack_sequence_dic={}
LabelEncoder_ifm("Backcontact_stack_sequence",data,Backcontact_stack_sequence_dic)
Backcontact_deposition_procedure_dic={}
LabelEncoder_ifm("Backcontact_deposition_procedure",data,Backcontact_deposition_procedure_dic)

In [5]:
data["pce"] = data["JV_default_PCE"].apply(lambda x:1 if x>17 else 0)
X = data.drop(columns=['JV_default_PCE','JV_default_Voc','JV_default_Jsc','JV_default_FF',"pce"]).copy()
y = data["pce"].copy()

In [6]:
best_features = ['Cell_architecture', 'Substrate_stack_sequence', 'ETL_stack_sequence',
       'ETL_additives_compounds', 'ETL_deposition_procedure',
       'Perovskite_composition_none_stoichiometry_components_in_excess',
       'Perovskite_additives_compounds', 'Perovskite_deposition_procedure',
       'DMF_DMSO', 'Perovskite_deposition_quenching_media',
       'HTL_stack_sequence', 'HTL_additives_compounds',
       'Backcontact_stack_sequence', 'Backcontact_thickness_list',
       'Perovskite_deposition_solvents_IPA',
       'Perovskite_deposition_thermal_annealing']

# Voting model

In [8]:
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
import xgboost
from sklearn.ensemble import VotingClassifier
from imblearn.pipeline import Pipeline
import optuna

seed = 42
rf = RandomForestClassifier(n_estimators=184,
                             criterion='gini',
                             max_depth=18,
                             min_samples_split=2,
                             min_samples_leaf=2,
                             max_features='sqrt',
                             bootstrap=False,
                             n_jobs=-1,
                             random_state=seed) 
xgb = xgboost.XGBClassifier(eta = 0.038816566174682765,
                           max_depth = 10,
                           gamma = 0.22333749634570235, 
                           min_child_weight = 1,
                           subsample = 0.9480060645299058,
                           colsample_bytree = 0.5382666931937315,
                           random_state=42)

voting_names = [
    "Random Forest",
    "XGBoost"
]
voting_classifiers = [
    rf,
    xgb
]

voting_clf =  VotingClassifier(estimators=[(name, model) for name, model in zip(voting_names, voting_classifiers)],
                                        voting='soft', weights=[0.47286297402402977, 0.5310360184667748])

X = X[best_features]
rus = RandomUnderSampler(sampling_strategy=0.38, random_state=42)
X_fit, y_fit = rus.fit_resample(X, y)
smote = SMOTE(sampling_strategy=0.93, random_state=42)
X_fit, y_fit = smote.fit_resample(X_fit, y_fit)
voting_clf.fit(X_fit, y_fit)

# Experimental knowledge constraints

In [9]:
data

Unnamed: 0,Cell_architecture,Substrate_stack_sequence,ETL_stack_sequence,ETL_additives_compounds,ETL_deposition_procedure,Perovskite_composition_perovskite_ABC3_structure,Perovskite_composition_none_stoichiometry_components_in_excess,Perovskite_additives_compounds,Perovskite_deposition_procedure,DMF_DMSO,Perovskite_deposition_quenching_media,Perovskite_deposition_quenching_media_additives_compounds,Perovskite_deposition_solvent_annealing,HTL_stack_sequence,HTL_additives_compounds,HTL_deposition_procedure,Backcontact_stack_sequence,Backcontact_thickness_list,Backcontact_deposition_procedure,JV_default_Voc,JV_default_Jsc,JV_default_FF,JV_default_PCE,Perovskite_deposition_solvents_IPA,Perovskite_deposition_thermal_annealing,pce
0,0,0,16,116,3,1,12,83,11,1.000,8,8,0,19,83,5,3,80.0,1,0.800,20.40,0.630,10.30,0,75,0
1,0,0,16,8,3,1,12,83,11,1.000,8,8,0,19,83,5,3,80.0,1,0.830,21.30,0.640,11.50,0,75,0
2,0,0,16,9,3,1,12,83,11,1.000,8,8,0,19,83,5,3,80.0,1,0.870,21.70,0.660,12.90,0,75,0
3,0,0,12,116,10,1,12,83,11,1.000,8,8,0,19,83,5,3,60.0,1,0.913,15.77,0.700,10.00,0,30,0
4,0,0,15,31,14,1,2,322,11,1.000,8,8,0,19,83,5,3,80.0,1,1.080,21.70,0.725,17.00,0,30,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3520,0,0,15,20,17,1,12,38,18,1.000,8,8,0,19,58,5,3,100.0,1,0.925,18.10,0.650,11.10,1,34,0
3521,0,0,15,20,17,1,12,38,11,1.000,8,8,0,19,58,5,3,100.0,1,0.909,19.05,0.690,13.00,1,34,0
3522,0,0,15,70,14,1,12,322,11,0.800,2,8,0,19,80,5,3,100.0,1,1.100,22.30,0.771,18.90,0,8,1
3523,0,1,15,70,14,1,12,322,11,0.800,2,8,0,19,80,5,3,100.0,1,1.090,23.00,0.750,18.80,0,8,1


In [10]:
Cell_architecture_dic

{0: 'nip', 1: 'pin'}

## ETL

### nip

In [11]:
# Cell_architecture = 0, {ETL_stack_sequence, ETL_additives_compounds,ETL_deposition_procedure}
data_nip = data[data["Cell_architecture"] == 0]
etl = []
etl_add = []
etl_pro = []
for index in data_nip['ETL_stack_sequence'].unique():
    add = data_nip[data_nip['ETL_stack_sequence'] == index]['ETL_additives_compounds'].unique()
    for i in add:
        pro = data_nip[(data_nip['ETL_stack_sequence'] == index)&(data_nip['ETL_additives_compounds'] == i)]['ETL_deposition_procedure'].unique()
        for j in pro:
            etl.append(index)
            etl_add.append(i)
            etl_pro.append(j)
    

ETL_nip = pd.DataFrame({"etl": etl, "etl_add": etl_add,"etl_pro":etl_pro})
ETL_nip

Unnamed: 0,etl,etl_add,etl_pro
0,16,116,3
1,16,116,14
2,16,116,13
3,16,8,3
4,16,9,3
...,...,...,...
163,18,0,13
164,19,116,10
165,19,72,10
166,10,116,14


In [12]:
etl_nip = ETL_nip.to_numpy().tolist()
etl_nip_dic={}
for index in range(len(etl_nip)):
    etl_nip_dic[index] = etl_nip[index]
    
etl_nip_dic

{0: [16, 116, 3],
 1: [16, 116, 14],
 2: [16, 116, 13],
 3: [16, 8, 3],
 4: [16, 9, 3],
 5: [16, 31, 14],
 6: [16, 22, 3],
 7: [16, 120, 3],
 8: [12, 116, 10],
 9: [12, 116, 16],
 10: [12, 116, 2],
 11: [12, 116, 5],
 12: [12, 116, 8],
 13: [12, 116, 14],
 14: [12, 116, 18],
 15: [12, 116, 0],
 16: [12, 116, 7],
 17: [12, 69, 10],
 18: [12, 111, 10],
 19: [12, 111, 16],
 20: [12, 111, 0],
 21: [12, 76, 10],
 22: [12, 35, 10],
 23: [12, 48, 10],
 24: [12, 44, 0],
 25: [12, 128, 10],
 26: [12, 118, 10],
 27: [12, 62, 10],
 28: [12, 43, 10],
 29: [12, 107, 10],
 30: [12, 107, 14],
 31: [12, 83, 16],
 32: [12, 63, 10],
 33: [12, 36, 10],
 34: [12, 97, 16],
 35: [12, 71, 10],
 36: [12, 57, 10],
 37: [12, 82, 2],
 38: [12, 99, 10],
 39: [12, 31, 10],
 40: [12, 117, 10],
 41: [12, 119, 10],
 42: [12, 79, 10],
 43: [12, 93, 10],
 44: [12, 100, 10],
 45: [12, 94, 2],
 46: [12, 81, 10],
 47: [12, 109, 10],
 48: [15, 31, 14],
 49: [15, 31, 17],
 50: [15, 31, 1],
 51: [15, 31, 6],
 52: [15, 31, 15

### pin

In [13]:
# Cell_architecture = 1, {ETL_stack_sequence, ETL_additives_compounds,ETL_deposition_procedure}
data_pin = data[data["Cell_architecture"] == 1]
etl = []
etl_add = []
etl_pro = []
for index in data_pin['ETL_stack_sequence'].unique():
    add = data_pin[data_pin['ETL_stack_sequence'] == index]['ETL_additives_compounds'].unique()
    for i in add:
        pro = data_pin[(data_pin['ETL_stack_sequence'] == index)&(data_pin['ETL_additives_compounds'] == i)]['ETL_deposition_procedure'].unique()
        for j in pro:
            etl.append(index)
            etl_add.append(i)
            etl_pro.append(j)
    

ETL_pin = pd.DataFrame({"etl": etl, "etl_add": etl_add,"etl_pro":etl_pro})
etl_pin = ETL_pin.to_numpy().tolist()
etl_pin_dic={}
for index in range(len(etl_pin)):
    etl_pin_dic[index] = etl_pin[index]
    
etl_pin_dic

{0: [4, 116, 11],
 1: [4, 116, 14],
 2: [2, 116, 10],
 3: [2, 116, 14],
 4: [2, 116, 11],
 5: [2, 39, 10],
 6: [2, 103, 10],
 7: [2, 51, 10],
 8: [2, 46, 10],
 9: [2, 45, 10],
 10: [2, 87, 10],
 11: [2, 86, 10],
 12: [2, 88, 10],
 13: [2, 95, 10],
 14: [2, 89, 10],
 15: [2, 49, 10],
 16: [2, 121, 10],
 17: [2, 64, 10],
 18: [2, 104, 10],
 19: [2, 106, 10],
 20: [2, 105, 10],
 21: [2, 41, 10],
 22: [2, 92, 10],
 23: [3, 116, 9],
 24: [3, 116, 14],
 25: [3, 116, 11],
 26: [3, 129, 14],
 27: [3, 52, 14],
 28: [3, 53, 14],
 29: [3, 90, 14],
 30: [3, 77, 14],
 31: [1, 116, 9],
 32: [1, 116, 8],
 33: [1, 116, 12],
 34: [8, 116, 14],
 35: [5, 116, 12],
 36: [6, 116, 11],
 37: [6, 50, 11],
 38: [6, 91, 11],
 39: [7, 116, 14],
 40: [17, 116, 18],
 41: [11, 116, 10],
 42: [12, 116, 10],
 43: [0, 116, 8],
 44: [0, 56, 8],
 45: [0, 55, 8],
 46: [0, 54, 8],
 47: [18, 116, 13],
 48: [16, 116, 14],
 49: [19, 116, 10],
 50: [19, 42, 10],
 51: [15, 127, 14]}

## HTL

### nip

In [14]:
# Cell_architecture = 0, {HTL_stack_sequence, HTL_additives_compounds,HTL_deposition_procedure}
htl = []
htl_add = []
# htl_pro = []
for index in data_nip['HTL_stack_sequence'].unique():
    add = data_nip[data_nip['HTL_stack_sequence'] == index]['HTL_additives_compounds'].unique()
    for i in add:
#         pro = data_nip[(data_nip['HTL_stack_sequence'] == index)&(data_nip['HTL_additives_compounds'] == i)]['HTL_deposition_procedure'].unique()
#         for j in pro:
        htl.append(index)
        htl_add.append(i)
#         htl_pro.append(j)


# HTL_nip = pd.DataFrame({"htl": htl, "htl_add": htl_add,"htl_pro":htl_pro})
HTL_nip = pd.DataFrame({"htl": htl, "htl_add": htl_add})
htl_nip = HTL_nip.to_numpy().tolist()
htl_nip_dic={}
for index in range(len(htl_nip)):
    htl_nip_dic[index] = htl_nip[index]
    
htl_nip_dic

{0: [19, 83],
 1: [19, 80],
 2: [19, 114],
 3: [19, 70],
 4: [19, 71],
 5: [19, 72],
 6: [19, 58],
 7: [19, 75],
 8: [19, 27],
 9: [19, 12],
 10: [19, 13],
 11: [19, 51],
 12: [19, 61],
 13: [19, 53],
 14: [19, 19],
 15: [19, 52],
 16: [19, 79],
 17: [19, 24],
 18: [19, 25],
 19: [19, 26],
 20: [19, 55],
 21: [19, 109],
 22: [19, 77],
 23: [19, 2],
 24: [19, 82],
 25: [19, 22],
 26: [19, 69],
 27: [19, 41],
 28: [19, 104],
 29: [19, 4],
 30: [19, 3],
 31: [19, 85],
 32: [19, 48],
 33: [19, 87],
 34: [19, 37],
 35: [19, 38],
 36: [19, 39],
 37: [19, 40],
 38: [19, 106],
 39: [19, 50],
 40: [19, 68],
 41: [19, 10],
 42: [19, 76],
 43: [19, 33],
 44: [19, 23],
 45: [19, 56],
 46: [19, 54],
 47: [19, 57],
 48: [19, 81],
 49: [19, 78],
 50: [19, 59],
 51: [19, 60],
 52: [19, 21],
 53: [19, 34],
 54: [19, 32],
 55: [16, 80],
 56: [16, 114],
 57: [16, 48],
 58: [16, 77],
 59: [16, 73],
 60: [16, 75],
 61: [16, 110],
 62: [16, 96],
 63: [3, 114],
 64: [13, 80],
 65: [13, 114],
 66: [13, 75],
 

### pin

In [15]:
# Cell_architecture = 1, {HTL_stack_sequence, HTL_additives_compounds,HTL_deposition_procedure}
htl = []
htl_add = []
htl_pro = []
for index in data_pin['HTL_stack_sequence'].unique():
    add = data_pin[data_pin['HTL_stack_sequence'] == index]['HTL_additives_compounds'].unique()
    for i in add:
#         pro = data_pin[(data_pin['HTL_stack_sequence'] == index)&(data_pin['HTL_additives_compounds'] == i)]['HTL_deposition_procedure'].unique()
#         for j in pro:
            htl.append(index)
            htl_add.append(i)
#             htl_pro.append(j)
    

# HTL_pin = pd.DataFrame({"htl": htl, "htl_add": htl_add,"htl_pro":htl_pro})
HTL_pin = pd.DataFrame({"htl": htl, "htl_add": htl_add})
htl_pin = HTL_pin.to_numpy().tolist()
htl_pin_dic={}
for index in range(len(htl_pin)):
    htl_pin_dic[index] = htl_pin[index]
    
htl_pin_dic

{0: [16, 114],
 1: [16, 103],
 2: [16, 48],
 3: [16, 92],
 4: [16, 49],
 5: [16, 108],
 6: [15, 114],
 7: [15, 93],
 8: [15, 80],
 9: [15, 91],
 10: [15, 17],
 11: [15, 118],
 12: [15, 111],
 13: [15, 5],
 14: [15, 64],
 15: [15, 7],
 16: [15, 98],
 17: [15, 95],
 18: [15, 11],
 19: [15, 88],
 20: [15, 42],
 21: [15, 97],
 22: [15, 89],
 23: [15, 44],
 24: [15, 65],
 25: [15, 120],
 26: [15, 28],
 27: [15, 105],
 28: [15, 15],
 29: [15, 113],
 30: [7, 30],
 31: [7, 114],
 32: [7, 117],
 33: [7, 20],
 34: [7, 75],
 35: [7, 74],
 36: [7, 6],
 37: [7, 8],
 38: [7, 115],
 39: [7, 102],
 40: [7, 9],
 41: [7, 99],
 42: [7, 84],
 43: [7, 29],
 44: [7, 86],
 45: [7, 16],
 46: [7, 107],
 47: [7, 14],
 48: [7, 46],
 49: [7, 116],
 50: [7, 112],
 51: [7, 18],
 52: [7, 94],
 53: [7, 43],
 54: [7, 45],
 55: [9, 114],
 56: [9, 119],
 57: [9, 30],
 58: [9, 20],
 59: [9, 62],
 60: [12, 114],
 61: [8, 114],
 62: [8, 31],
 63: [8, 0],
 64: [19, 80],
 65: [19, 114],
 66: [19, 58],
 67: [13, 114],
 68: [5

## PVK

In [16]:
# MAPbI3,{Perovskite_additives_compounds,Perovskite_deposition_procedure,Perovskite_deposition_quenching_media,Perovskite_deposition_quenching_media_additives_compounds}
pvk_add = []
pvkm = []
pvkm_add = []
pvk_pro = []
for add in data['Perovskite_additives_compounds'].unique():
    for index in data['Perovskite_deposition_quenching_media'].unique():
        m_add = data[data['Perovskite_deposition_quenching_media'] == index]['Perovskite_deposition_quenching_media_additives_compounds'].unique()
        for i in m_add:
            pro = data[(data['Perovskite_deposition_quenching_media'] == index)&(data['Perovskite_additives_compounds'] == add)]['Perovskite_deposition_procedure'].unique()
            for j in pro:
                pvk_add.append(add)
                pvkm.append(index)
                pvkm_add.append(i)
                pvk_pro.append(j)
    

PVK = pd.DataFrame({"pvk_add":pvk_add,"pvk_pro":pvk_pro,"pvkm": pvkm, "pvkm_add": pvkm_add})

pvkm_list = PVK.to_numpy().tolist()
pvkm_list_dic={}
for index in range(len(pvkm_list)):
    pvkm_list_dic[index] = pvkm_list[index]

pvkm_list_dic

{0: [83, 11, 8, 8],
 1: [83, 16, 8, 8],
 2: [83, 6, 8, 8],
 3: [83, 12, 8, 8],
 4: [83, 10, 8, 8],
 5: [83, 2, 8, 8],
 6: [83, 1, 8, 8],
 7: [83, 7, 8, 8],
 8: [83, 14, 8, 8],
 9: [83, 18, 8, 8],
 10: [83, 9, 8, 8],
 11: [83, 5, 8, 8],
 12: [83, 0, 8, 8],
 13: [83, 11, 2, 8],
 14: [83, 16, 2, 8],
 15: [83, 12, 2, 8],
 16: [83, 0, 2, 8],
 17: [83, 11, 2, 7],
 18: [83, 16, 2, 7],
 19: [83, 12, 2, 7],
 20: [83, 0, 2, 7],
 21: [83, 11, 2, 6],
 22: [83, 16, 2, 6],
 23: [83, 12, 2, 6],
 24: [83, 0, 2, 6],
 25: [83, 11, 2, 5],
 26: [83, 16, 2, 5],
 27: [83, 12, 2, 5],
 28: [83, 0, 2, 5],
 29: [83, 11, 2, 4],
 30: [83, 16, 2, 4],
 31: [83, 12, 2, 4],
 32: [83, 0, 2, 4],
 33: [83, 11, 2, 2],
 34: [83, 16, 2, 2],
 35: [83, 12, 2, 2],
 36: [83, 0, 2, 2],
 37: [83, 11, 2, 3],
 38: [83, 16, 2, 3],
 39: [83, 12, 2, 3],
 40: [83, 0, 2, 3],
 41: [83, 11, 4, 8],
 42: [83, 11, 3, 8],
 43: [83, 11, 3, 0],
 44: [83, 11, 3, 3],
 45: [83, 11, 3, 1],
 46: [83, 11, 7, 8],
 47: [83, 16, 9, 8],
 48: [83, 11, 9,

# Reverse Design

In [17]:
for column in X[best_features].columns:
    print(f'column:{column},max:{max(X[best_features][column])}')

column:Cell_architecture,max:1
column:Substrate_stack_sequence,max:1
column:ETL_stack_sequence,max:19
column:ETL_additives_compounds,max:129
column:ETL_deposition_procedure,max:19
column:Perovskite_composition_none_stoichiometry_components_in_excess,max:12
column:Perovskite_additives_compounds,max:340
column:Perovskite_deposition_procedure,max:19
column:DMF_DMSO,max:1.0
column:Perovskite_deposition_quenching_media,max:9
column:HTL_stack_sequence,max:19
column:HTL_additives_compounds,max:120
column:Backcontact_stack_sequence,max:8
column:Backcontact_thickness_list,max:550.0
column:Perovskite_deposition_solvents_IPA,max:1
column:Perovskite_deposition_thermal_annealing,max:455


## nip

In [18]:
class Error(Exception):
    pass
prob_params = []
prob = []

def ft_optuna_objective(trial):  
    try:
        ETL = trial.suggest_int("ETL", 0, 167, 1)
        PVK = trial.suggest_int("PVK", 0, 1386, 1)
        HTL = trial.suggest_int("HTL", 0, 87, 1)
        ETL_stack_sequence = etl_nip_dic[ETL][0]
        ETL_additives_compounds = etl_nip_dic[ETL][1]
        ETL_deposition_procedure = etl_nip_dic[ETL][2]
        Perovskite_additives_compounds = pvkm_list_dic[PVK][0]
        Perovskite_deposition_procedure = pvkm_list_dic[PVK][1]
        Perovskite_deposition_quenching_media = pvkm_list_dic[PVK][2]
        HTL_stack_sequence = htl_nip_dic[HTL][0]
        HTL_additives_compounds = htl_nip_dic[HTL][1]
        
        Cell_architecture = 0   # nip
        Substrate_stack_sequence = trial.suggest_int("Substrate_stack_sequence", 0, 1, 1)
        Perovskite_composition_none_stoichiometry_components_in_excess = trial.suggest_int("Perovskite_composition_none_stoichiometry_components_in_excess", 0, 12, 1)
        DMF_DMS0 = trial.suggest_float("DMF_DMS0", 0, 1)
        Backcontact_stack_sequence = trial.suggest_int("Backcontact_stack_sequence", 0, 8, 1)
        Backcontact_thickness_list = trial.suggest_float("Backcontact_thickness_list", 0, 550)
        Perovskite_deposition_solvents_IPA = trial.suggest_int("Perovskite_deposition_solvents_IPA", 0, 1, 1)
        Perovskite_deposition_thermal_annealing = trial.suggest_int("Perovskite_deposition_thermal_annealing", 0, 455, 1)
        
        params = {
            'Cell_architecture':Cell_architecture,
            'Substrate_stack_sequence':Substrate_stack_sequence,
            'ETL_stack_sequence':ETL_stack_sequence,
            'ETL_additives_compounds':ETL_additives_compounds,
            'ETL_deposition_procedure':ETL_deposition_procedure,
            'Perovskite_composition_none_stoichiometry_components_in_excess':Perovskite_composition_none_stoichiometry_components_in_excess,
            'Perovskite_additives_compounds':Perovskite_additives_compounds,
            'Perovskite_deposition_procedure':Perovskite_deposition_procedure,
            'DMF_DMS0':DMF_DMS0,
            'Perovskite_deposition_quenching_media':Perovskite_deposition_quenching_media,
            'HTL_stack_sequence':HTL_stack_sequence,
            'HTL_additives_compounds':HTL_additives_compounds,
            'Backcontact_stack_sequence':Backcontact_stack_sequence,
            'Backcontact_thickness_list':Backcontact_thickness_list,
            'Perovskite_deposition_solvents_IPA':Perovskite_deposition_solvents_IPA,
            'Perovskite_deposition_thermal_annealing':Perovskite_deposition_thermal_annealing
        }
        
    except Error:
        raise optuna.TrialPruned()     

    params_list = pd.Series(params).values.reshape(1,-1)
    params_list = pd.DataFrame(params_list, columns=X_fit.columns)
    y_prob = voting_clf.predict_proba(params_list)[:,1]
    data_rf_none = pd.DataFrame()

    if y_prob > 0.9:
        prob.append(y_prob)
        prob_params.append(params_list)
    return y_prob

In [19]:
def optimizer_optuna(n_trials, algo):

    if algo == "TPE":
        algo = optuna.samplers.TPESampler(n_startup_trials = 10, n_ei_candidates = 24)
    elif algo == "GP":
        from optuna.integration import SkoptSampler
        import skopt
        algo = SkoptSampler(skopt_kwargs={'base_estimator':'GP',
                                          'n_initial_points':10, 
                                          'acq_func':'EI'} 
                           )

    study = optuna.create_study(sampler = algo 
                                , direction="maximize" 
                               )
    
    study.optimize(ft_optuna_objective 
                   , n_trials=n_trials 
                   , show_progress_bar=True 
                  )
    
    print("\n","\n","best params: ", study.best_trial.params,
          "\n","\n","best score: ", study.best_trial.values,
          "\n")

    
    return study.best_trial.params, study.best_trial.values 

In [20]:
optuna.logging.set_verbosity(optuna.logging.ERROR) 
best_nip_params, best_nip_prob = optimizer_optuna(2000,"TPE")

  0%|          | 0/2000 [00:00<?, ?it/s]


 
 best params:  {'ETL': 26, 'PVK': 645, 'HTL': 62, 'Substrate_stack_sequence': 0, 'Perovskite_composition_none_stoichiometry_components_in_excess': 9, 'DMF_DMS0': 0.9706586128458718, 'Backcontact_stack_sequence': 1, 'Backcontact_thickness_list': 114.37485070686964, 'Perovskite_deposition_solvents_IPA': 0, 'Perovskite_deposition_thermal_annealing': 13} 
 
 best score:  [0.9707129069110587] 



In [21]:
col_nip = ['Cell_architecture', 'Substrate_stack_sequence', 'ETL_stack_sequence',
       'ETL_additives_compounds', 'ETL_deposition_procedure',
       'Perovskite_composition_none_stoichiometry_components_in_excess',
       'Perovskite_additives_compounds', 'Perovskite_deposition_procedure',
       'DMF_DMSO', 'Perovskite_deposition_quenching_media',
       'HTL_stack_sequence', 'HTL_additives_compounds',
       'Backcontact_stack_sequence', 'Backcontact_thickness_list',
       'Perovskite_deposition_solvents_IPA',
       'Perovskite_deposition_thermal_annealing','prob']

In [22]:
if len(prob_params) > 0:
    df_prob_params = pd.concat(prob_params, ignore_index=True)
else:
    print("No data in prob_params.")
    
prob_params = pd.DataFrame(df_prob_params)
prob = [item[0] for item in prob]
prob_params['prob']=prob
prob_params.columns = col_nip
for column in prob_params.keys():
    dic_name = column + "_dic"
    if dic_name in globals():
        prob_params[column] = [globals()[dic_name].get(value,value) for value in prob_params[column]]
    else:
        continue
prob_params

Unnamed: 0,Cell_architecture,Substrate_stack_sequence,ETL_stack_sequence,ETL_additives_compounds,ETL_deposition_procedure,Perovskite_composition_none_stoichiometry_components_in_excess,Perovskite_additives_compounds,Perovskite_deposition_procedure,DMF_DMSO,Perovskite_deposition_quenching_media,HTL_stack_sequence,HTL_additives_compounds,Backcontact_stack_sequence,Backcontact_thickness_list,Perovskite_deposition_solvents_IPA,Perovskite_deposition_thermal_annealing,prob
0,nip,SLG | FTO,TiO2-c,rGraphene oxide,Spin-coating,PbI2; PbBr2,PbAc2,Spin-coating,0.413749,Diethyl ether,Spiro-MeOTAD,Li-TFSI; TPB,MoO3 | Al,103.837946,0,100.0-70.0,0.931267
1,nip,SLG | ITO,TiO2-c,Zn,Spin-coating,PbI2; PbBr2,Caffeine,Spin-coating,0.594520,Diethyl ether,Spiro-MeOTAD,JQ3; Li-TFSI; TBP,MoO3 | Al,94.324074,0,100; 100-10.0; 60.0,0.903249
2,nip,SLG | ITO,TiO2-c,CF3NaO2S,Spin-coating,PbI2; PbBr2,TFBA,Spin-coating,0.789716,Diethyl ether,Spiro-MeOTAD,Li-TFSI; TBP,MoO3 | Al,423.643550,0,100.0-100.0,0.918312
3,nip,SLG | ITO,TiO2-c,TOPD,Spin-coating,PbI2; PbBr2,PbSCN2,Spin-coating,0.968268,Diethyl ether,Spiro-MeOTAD,Li-TFSI; TPB,MoO3 | Al,200.480113,0,100.0; 120.0-90.0; 15.0,0.902496
4,nip,SLG | FTO,TiO2-c,Zn,Spin-coating,PbI2; PbBr2,EC,Spin-coating,0.936300,Diethyl ether,Spiro-MeOTAD,JQ1; Li-TFSI; TBP,MoO3 | Al,453.624393,0,100.0-600.0,0.939985
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,nip,SLG | FTO,SnO2-c,Bi2O2S-np,Spin-coating,NH4Cl,Cl; DPE,Spin-coating >> CBD,0.796446,Chlorobenzene,PTAA,TPFB,Ag,154.966131,0,25; 60-20.0; 40.0,0.903983
280,nip,SLG | FTO,SnO2-np,Zr,Spin-coating,NH4Cl,Cs,Spin-coating,0.839664,Diethyl ether,PTAA,Li-TFSI,AgAl,163.357421,0,70.0-20.0,0.944395
281,nip,SLG | FTO,SnO2-c,Ti3C2,Spin-coating,NH4Cl,ITIC,Spin-coating,0.859713,Chlorobenzene,PTAA,Li-TFSI; Polystyrene; TBP,AgAl,223.421049,0,60.0-2.0,0.952053
282,nip,SLG | FTO,SnO2-np,Zr,Spin-coating,NH4Cl,ITIC; PCBM-60,Spin-coating,0.806611,Chlorobenzene,PTAA,LAD,AgAl,402.340067,0,150; 100-5.0; 20.0,0.945167


In [23]:
prob_params_nip = prob_params.sort_values(by='prob',ascending=False).reset_index(drop=True)
prob_params_nip.head(20)

Unnamed: 0,Cell_architecture,Substrate_stack_sequence,ETL_stack_sequence,ETL_additives_compounds,ETL_deposition_procedure,Perovskite_composition_none_stoichiometry_components_in_excess,Perovskite_additives_compounds,Perovskite_deposition_procedure,DMF_DMSO,Perovskite_deposition_quenching_media,HTL_stack_sequence,HTL_additives_compounds,Backcontact_stack_sequence,Backcontact_thickness_list,Perovskite_deposition_solvents_IPA,Perovskite_deposition_thermal_annealing,prob
0,nip,SLG | FTO,TiO2-c,Zn,Spin-coating,PbI2,PAA,Spin-coating,0.970659,Diethyl ether,PTAA,O2,AgAl,114.374851,0,100.0-120.0,0.970713
1,nip,SLG | FTO,SnO2-c,SnCl2; MercaptoPropionic acid; Urea,Spin-coating,NH4Cl,DMI,Spin-coating,0.766414,Diethyl ether,PTAA,Li-TFSI,AgAl,317.410626,0,100.0-50.0,0.966778
2,nip,SLG | FTO,TiO2-c,TOPD,Spin-coating | Spin-coating,PbI2,PU,Spin-coating,0.972307,Diethyl ether,PTAA,O2,AgAl,136.427656,0,100.0-15.0,0.963339
3,nip,SLG | FTO,SnO2-c,Undoped,Evaporation,NH4Cl,Rb,Spin-coating,0.766852,Diethyl ether,PTAA,Li-TFSI; Polystyrene; TBP,Ag,427.409895,0,100.0-30.0,0.962446
4,nip,SLG | FTO,SnO2-c,Undoped,Spin-coating,Pb,DPSI,Spin-coating,0.798483,Toluene,PTAA,O2,AgAl,365.711408,0,100.0-240.0,0.96179
5,nip,SLG | FTO,TiO2-c,Galliumnitrate聽hydrate,Spin-coating,PbI2,PAA,Spin-coating,0.973323,Diethyl ether,PTAA,O2,AgAl,118.635581,0,100.0-100.0,0.961558
6,nip,SLG | FTO,SnO2-c,Ti3C2,Spin-coating,NH4Cl,DA2PbI4,Spin-coating,0.877795,Chlorobenzene,PTAA,LAD,AgAl,283.109012,0,65.0; 100.0-2.0; 4.0,0.961545
7,nip,SLG | FTO,TiO2-c | PCBM-60,Undoped,Spin-coating | Spin-coating,NH4Cl,E2CA,Spin-coating,0.743396,Chlorobenzene,PTAA,LAD,AgAl,378.69516,0,100.0-240.0,0.961025
8,nip,SLG | FTO,SnO2-c,La,Spin-coating,NH4Cl,HATNA,Spin-coating,0.710715,Diethyl ether,PTAA,Li-TFSI,Ag,404.397925,0,100.0-150.0,0.959123
9,nip,SLG | FTO,SnO2-c,SnCl2; MercaptoPropionic acid; Urea,Spin-coating,NH4Cl,PMA,Spin-coating,0.884399,Diethyl ether,PTAA,Li-TFSI; Polystyrene; TBP,AgAl,164.321683,0,60.0; 100.0-5.0; 10.0,0.958981


## pin

In [24]:
class Error(Exception):
    pass
prob_params = []
prob = []
def ft_optuna_objective(trial):  
    try:
        ETL = trial.suggest_int("ETL", 0, 51, 1)
        PVK = trial.suggest_int("PVK", 0, 1386, 1)
        HTL = trial.suggest_int("HTL", 0, 81, 1)
        ETL_stack_sequence = etl_pin_dic[ETL][0]
        ETL_additives_compounds = etl_pin_dic[ETL][1]
        ETL_deposition_procedure = etl_pin_dic[ETL][2]
        Perovskite_additives_compounds = pvkm_list_dic[PVK][0]
        Perovskite_deposition_procedure = pvkm_list_dic[PVK][1]
        Perovskite_deposition_quenching_media = pvkm_list_dic[PVK][2]
        HTL_stack_sequence = htl_pin_dic[HTL][0]
        HTL_additives_compounds = htl_pin_dic[HTL][1]
        
        Cell_architecture = 1   # pin
        Substrate_stack_sequence = trial.suggest_int("Substrate_stack_sequence", 0, 1, 1)
        Perovskite_composition_none_stoichiometry_components_in_excess = trial.suggest_int("Perovskite_composition_none_stoichiometry_components_in_excess", 0, 12, 1)
        DMF_DMS0 = trial.suggest_float("DMF_DMS0", 0, 1)
        Backcontact_stack_sequence = trial.suggest_int("Backcontact_stack_sequence", 0, 8, 1)
        Backcontact_thickness_list = trial.suggest_float("Backcontact_thickness_list", 0, 550)
        Perovskite_deposition_solvents_IPA = trial.suggest_int("Perovskite_deposition_solvents_IPA", 0, 1, 1)
        Perovskite_deposition_thermal_annealing = trial.suggest_int("Perovskite_deposition_thermal_annealing", 0, 455, 1)
        
        params = {
            'Cell_architecture':Cell_architecture,
            'Substrate_stack_sequence':Substrate_stack_sequence,
            'ETL_stack_sequence':ETL_stack_sequence,
            'ETL_additives_compounds':ETL_additives_compounds,
            'ETL_deposition_procedure':ETL_deposition_procedure,
            'Perovskite_composition_none_stoichiometry_components_in_excess':Perovskite_composition_none_stoichiometry_components_in_excess,
            'Perovskite_additives_compounds':Perovskite_additives_compounds,
            'Perovskite_deposition_procedure':Perovskite_deposition_procedure,
            'DMF_DMS0':DMF_DMS0,
            'Perovskite_deposition_quenching_media':Perovskite_deposition_quenching_media,
            'HTL_stack_sequence':HTL_stack_sequence,
            'HTL_additives_compounds':HTL_additives_compounds,
            'Backcontact_stack_sequence':Backcontact_stack_sequence,
            'Backcontact_thickness_list':Backcontact_thickness_list,
            'Perovskite_deposition_solvents_IPA':Perovskite_deposition_solvents_IPA,
            'Perovskite_deposition_thermal_annealing':Perovskite_deposition_thermal_annealing
        }
        
    except Error:
        raise optuna.TrialPruned()     

    params_list = pd.Series(params).values.reshape(1,-1)
    params_list = pd.DataFrame(params_list, columns=X_fit.columns)
    y_prob = voting_clf.predict_proba(params_list)[:,1]
    data_rf_none = pd.DataFrame()

    if y_prob > 0.9:
        prob.append(y_prob)
        prob_params.append(params_list)
    return y_prob

In [25]:
def optimizer_optuna(n_trials, algo):

    if algo == "TPE":
        algo = optuna.samplers.TPESampler(n_startup_trials = 10, n_ei_candidates = 24)
    elif algo == "GP":
        from optuna.integration import SkoptSampler
        import skopt
        algo = SkoptSampler(skopt_kwargs={'base_estimator':'GP',
                                          'n_initial_points':10, 
                                          'acq_func':'EI'} 
                           )

    study = optuna.create_study(sampler = algo 
                                , direction="maximize" 
                               )

    study.optimize(ft_optuna_objective 
                   , n_trials=n_trials 
                   , show_progress_bar=True 
                  )

    print("\n","\n","best params: ", study.best_trial.params,
          "\n","\n","best score: ", study.best_trial.values,
          "\n")

    
    return study.best_trial.params, study.best_trial.values

In [26]:
optuna.logging.set_verbosity(optuna.logging.ERROR) 
best_pin_params, best_pin_prob = optimizer_optuna(2000,"TPE")

  0%|          | 0/2000 [00:00<?, ?it/s]


 
 best params:  {'ETL': 35, 'PVK': 1092, 'HTL': 3, 'Substrate_stack_sequence': 1, 'Perovskite_composition_none_stoichiometry_components_in_excess': 11, 'DMF_DMS0': 0.7524417934134233, 'Backcontact_stack_sequence': 1, 'Backcontact_thickness_list': 95.52005388477407, 'Perovskite_deposition_solvents_IPA': 0, 'Perovskite_deposition_thermal_annealing': 269} 
 
 best score:  [0.9745784414834437] 



In [27]:
col_pin = ['Cell_architecture', 'Substrate_stack_sequence', 'ETL_stack_sequence',
       'ETL_additives_compounds', 'ETL_deposition_procedure',
       'Perovskite_composition_none_stoichiometry_components_in_excess',
       'Perovskite_additives_compounds', 'Perovskite_deposition_procedure',
       'DMF_DMSO', 'Perovskite_deposition_quenching_media',
       'HTL_stack_sequence', 'HTL_additives_compounds',
       'Backcontact_stack_sequence', 'Backcontact_thickness_list',
       'Perovskite_deposition_solvents_IPA',
       'Perovskite_deposition_thermal_annealing','prob']

In [28]:
if len(prob_params) > 0:
    df_prob_params = pd.concat(prob_params, ignore_index=True)
else:
    print("No data in prob_params.")
    
prob_params = pd.DataFrame(df_prob_params)
prob = [item[0] for item in prob]
prob_params['prob']=prob
prob_params.columns = col_pin
for column in prob_params.keys():
    dic_name = column + "_dic"
    if dic_name in globals():
        prob_params[column] = [globals()[dic_name].get(value,value) for value in prob_params[column]]
    else:
        continue
prob_params

Unnamed: 0,Cell_architecture,Substrate_stack_sequence,ETL_stack_sequence,ETL_additives_compounds,ETL_deposition_procedure,Perovskite_composition_none_stoichiometry_components_in_excess,Perovskite_additives_compounds,Perovskite_deposition_procedure,DMF_DMSO,Perovskite_deposition_quenching_media,HTL_stack_sequence,HTL_additives_compounds,Backcontact_stack_sequence,Backcontact_thickness_list,Perovskite_deposition_solvents_IPA,Perovskite_deposition_thermal_annealing,prob
0,pin,SLG | ITO,PCBM-60 | BCP,DMOAP,Spin-coating | Spin-coating,Stoichiometric,Mn,Spin-coating,0.918762,Chlorobenzene,PTAA,F8BT,AgAl,273.243958,0,60; 80-1.0; 2.0,0.900839
1,pin,SLG | ITO,PCBM-60 | ZnO-np,Undoped,Spin-coating | Spin-coating,none,Phenylethylammonium iodide,Spin-coating,0.940417,Diethyl ether,PTAA,PolyTPD,AgAl,368.981191,0,65; 100-2.0; 2.0,0.939704
2,pin,SLG | ITO,PCBM-60 | bis-C60,Undoped,Spin-coating | Spin-coating,PbI2; PbBr2,Cs,Spin-coating,0.906358,Diethyl ether,PTAA,F4-TCNQ,AgAl,224.437778,0,180.0-10.0,0.925619
3,pin,SLG | ITO,PCBM-60 | LiF,Undoped,Spin-coating | Evaporation,PbI2,Cl; DPE,Spin-coating >> CBD,0.752266,Chlorobenzene,PTAA,NPB,AgAl,361.713456,0,80.0-180.0,0.905628
4,pin,SLG | ITO,PCBM-60 | bis-C60,Undoped,Spin-coating | Spin-coating,none,PMA,Spin-coating,0.979717,Diethyl ether,PTAA,TAPC,AgAl,323.890174,0,90-30.0,0.940487
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
400,pin,SLG | ITO,C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,PbI2; PbBr2,HATNA,Spin-coating,0.785070,Diethyl ether,PTAA,F8BT,AgAl,95.335625,0,60; 100-2.0; 10.0,0.939900
401,pin,SLG | ITO,PCBM-60 | C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,Stoichiometric,DA2PbI4,Drop-infiltration,0.916023,Chlorobenzene,PTAA,PolyTPD,AgAl,131.678064,0,65.0-20.0,0.939860
402,pin,SLG | ITO,PCBM-60 | LiF,PNDI-2T,Spin-coating | Evaporation,Stoichiometric,Caffeine,Spin-coating,0.264875,Diethyl ether,PTAA,NPB,Al,78.018122,0,60; 100-45.0; 50.0,0.938325
403,pin,SLG | ITO,PCBM-60 | C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,Stoichiometric,Diiodooctane,Spin-coating >> CBD,0.846651,Chlorobenzene,PTAA,TAPC,AgAl,155.053023,0,115.0-30.0,0.922535


In [29]:
prob_params_pin = prob_params.sort_values(by='prob',ascending=False).reset_index(drop=True)
prob_params_pin.head(20)

Unnamed: 0,Cell_architecture,Substrate_stack_sequence,ETL_stack_sequence,ETL_additives_compounds,ETL_deposition_procedure,Perovskite_composition_none_stoichiometry_components_in_excess,Perovskite_additives_compounds,Perovskite_deposition_procedure,DMF_DMSO,Perovskite_deposition_quenching_media,HTL_stack_sequence,HTL_additives_compounds,Backcontact_stack_sequence,Backcontact_thickness_list,Perovskite_deposition_solvents_IPA,Perovskite_deposition_thermal_annealing,prob
0,pin,SLG | ITO,PCBM-60 | C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,Stoichiometric,PMA,Spin-coating,0.752442,Diethyl ether,PTAA,NPB,AgAl,95.520054,0,60; 100-5.0; 10.0,0.974578
1,pin,SLG | ITO,PCBM-60 | LiF,Undoped,Spin-coating | Evaporation,Stoichiometric,Cs,Spin-coating,0.746834,Diethyl ether,PTAA,NPB,Al,103.190626,0,60; 100-0.5; 5.0,0.969979
2,pin,SLG | ITO,PCBM-60 | LiF,Undoped,Spin-coating | Evaporation,Stoichiometric,NMA,Spin-coating,0.752106,Diethyl ether,PTAA,TAPC,AgAl,89.02847,0,60; 100-3.0; 20.0,0.969584
3,pin,SLG | ITO,PCBM-60 | LiF,Undoped,Spin-coating | Evaporation,Stoichiometric,PMA,Spin-coating,0.776601,Diethyl ether,PTAA,NPB,Al,93.374074,0,60; 100-45.0; 50.0,0.969068
4,pin,SLG | ITO,PCBM-60 | LiF,PNDI-2T,Spin-coating | Evaporation,Stoichiometric,MA,Spin-coating,0.882389,Diethyl ether,PTAA,TAPC,Al,79.102843,0,65-2.0,0.966775
5,pin,SLG | ITO,PCBM-60 | C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,Stoichiometric,Cs,Spin-coating,0.707541,Diethyl ether,PTAA,NPB,Al,99.499056,0,60.0-45.0,0.965271
6,pin,SLG | ITO,PCBM-60 | LiF,DIO,Spin-coating | Evaporation,Stoichiometric,Cs,Spin-coating,0.826797,Diethyl ether,PTAA,TAPC,AgAl,77.244341,0,40; 100-3.0; 10.0,0.963194
7,pin,SLG | ITO,PCBM-60 | C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,Stoichiometric,Diiodooctane,Spin-coating >> CBD,0.748596,Chlorobenzene,PTAA,NPB,AgAl,95.356217,0,60; 80-2.0; 5.0,0.961275
8,pin,SLG | ITO,PCBM-60 | C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,PbI2; PbBr2,Diiodooctane,Spin-coating,0.897588,Chlorobenzene,PTAA,TAPC,AgAl,72.515982,0,130-30.0,0.960297
9,pin,SLG | ITO,PCBM-60 | C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,Stoichiometric,NH4SCN,Spin-coating,0.822625,Chlorobenzene,PTAA,NPB,AgAl,95.545151,0,60; 100-10.0; 10.0,0.959266


In [30]:
prob_params_all = pd.concat([prob_params_nip, prob_params_pin]).sort_values(by='prob',ascending=False).reset_index(drop=True)
prob_params_all.to_csv("prob_params_all.csv",encoding="gbk", index=False)
prob_params_all

Unnamed: 0,Cell_architecture,Substrate_stack_sequence,ETL_stack_sequence,ETL_additives_compounds,ETL_deposition_procedure,Perovskite_composition_none_stoichiometry_components_in_excess,Perovskite_additives_compounds,Perovskite_deposition_procedure,DMF_DMSO,Perovskite_deposition_quenching_media,HTL_stack_sequence,HTL_additives_compounds,Backcontact_stack_sequence,Backcontact_thickness_list,Perovskite_deposition_solvents_IPA,Perovskite_deposition_thermal_annealing,prob
0,pin,SLG | ITO,PCBM-60 | C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,Stoichiometric,PMA,Spin-coating,0.752442,Diethyl ether,PTAA,NPB,AgAl,95.520054,0,60; 100-5.0; 10.0,0.974578
1,nip,SLG | FTO,TiO2-c,Zn,Spin-coating,PbI2,PAA,Spin-coating,0.970659,Diethyl ether,PTAA,O2,AgAl,114.374851,0,100.0-120.0,0.970713
2,pin,SLG | ITO,PCBM-60 | LiF,Undoped,Spin-coating | Evaporation,Stoichiometric,Cs,Spin-coating,0.746834,Diethyl ether,PTAA,NPB,Al,103.190626,0,60; 100-0.5; 5.0,0.969979
3,pin,SLG | ITO,PCBM-60 | LiF,Undoped,Spin-coating | Evaporation,Stoichiometric,NMA,Spin-coating,0.752106,Diethyl ether,PTAA,TAPC,AgAl,89.028470,0,60; 100-3.0; 20.0,0.969584
4,pin,SLG | ITO,PCBM-60 | LiF,Undoped,Spin-coating | Evaporation,Stoichiometric,PMA,Spin-coating,0.776601,Diethyl ether,PTAA,NPB,Al,93.374074,0,60; 100-45.0; 50.0,0.969068
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
684,pin,SLG | ITO,PCBM-60 | C60 | BCP,Undoped,Spin-coating | Evaporation | Evaporation,Stoichiometric,Ni,Spin-coating,0.773320,Chlorobenzene,PTAA,TAPC,AgAl,18.695409,0,180.0-10.0,0.900361
685,pin,SLG | ITO,PCBM-60 | LiF,DIO,Spin-coating | Evaporation,Stoichiometric,ITIC; PCBM-60,Spin-coating,0.979812,Chlorobenzene,PTAA,NPB,AgAl,0.477438,0,110.0-60.0,0.900097
686,nip,SLG | FTO,TiO2-c,HCl,Spin-coating,PbI2,GuaI,Spin-coating,0.936893,Chlorobenzene,PTAA,F4-TCNQ,AgAl,539.819982,0,65.0; 100.0-1.0; 2.0,0.900092
687,nip,SLG | FTO,TiO2-c | Al2O3-mp,Undoped,ALD | Spin-coating,Pb,KSCN,Spin-coating,0.825742,Chlorobenzene,PTAA,LAD,Ag,369.358207,0,100.0-12.0,0.900054
