## APPROACH: 

## Assuming only CO2 gas, we are selecting a set of salts, and ONLY single salt


## Once we have that, the features we are using are: ion concentrations, ion charge, and ion energy. 

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

from tqdm import tqdm

In [2]:
Kg={"He":-0.222,"H2":-0.2144,"Ne":-0.224,"N2":-0.1904,"Ar":-0.1866,"O2":-0.1737,"Kr":-0.1762,"C2H4":-0.1977,
        "N2O":-0.2144,"C2H2":-0.2182,"CO2":-0.2377,"H2S":-0.2551,"NH3":-0.2394,"SO2":-0.3154,"CH4":-0.2168,
        "DME":-0.1972,"Cl":0.3416,"Br":0.3310,"I":0.3124,"NO3":0.3230,"OH":0.3875,"CNS":0.2612,"HS":0.3718,
        "HCO3":0.4286,"SO4":0.3446,"CO3":0.3754,"SO3":0.3275,"PO4":0.3265,"Li":-0.0416,"Na":-0.0183,"K":-0.0362,
        "Cs":-0.0584,"NH4":-0.0737,"H":-0.1110,"Rb":-0.0449,"Mg":-0.0568,"Ca":-0.0547,"Ba":-0.0473,"Zn":-0.0590,
        "Sr":-0.0445,"Fe":-0.0602,"Co":-0.0534,"Ni":-0.0520,"Mn":-0.0625,"Al":-0.726}

anioncharge={"Cl":1,"Br":1,"I":1,"NO3":1,"OH":1,"CNS":1,"HS":1,"HCO3":1,"SO4":2,"CO3":2,"SO3":2,"PO4":3}

cationcharge={"Li":1,"Na":1,"K":1,"Cs":1,"NH4":1,"H":1,"Rb":1,"Mg":2,"Ca":2,"Ba":2,"Zn":2,"Sr":2,"Fe":2,"Co":2,
         "Ni":2,"Mn":2,"Al":3}

gas_tb={"He":4.15,"H2":20.15,"Ne":24.45,"N2":77.15,"Ar":87.15,"O2":90.15,"Kr":119.93,"C2H4":169.45,
        "N2O":184.67,"C2H2":189.15,"CO2":194.65,"H2S":212.87,"NH3":237.65,"SO2":263.15,"CH4":111.65,
        "DME":249.00,"C2H6":184.6, "C3H8":231.1, "C4H10":272.7}

gas_tc={"He":5.19,"H2":33.2,"Ne":44.4,"N2":126.2,"Ar":150.8,"O2":154.6,"Kr":209.3,"C2H4":282.4,
        "N2O":309.5,"C2H2":309.2,"CO2":304.19,"H2S":373.65,"NH3":405.5,"SO2":430.8,"CH4":190.6,
        "DME":400.1, "C2H6":305.4, "C3H8":369.8, "C4H10":425.2}

gas_pc={"H2":12.97,"N2":33.95,"CO2":73.78,"H2S":89.37,"CH4":46.01,"DME":52.37,"C2H6":48.85,"C3H8":42.47,
        "C4H10":38.01, "He":2.27, "Ne":27.6,"Ar":48.7,"O2":50.5,"Kr":55,"C2H4":50.41,"N2O":72.4,"C2H2":62.45,
        "NH3":112.8,"SO2":78.84}

gas_mol_wt={"H2":2.02,"N2":28.01,"CO2":44.01,"H2S":34.08,"CH4":16.04,
        "DME":46.07,"C2H6":30.07,"C3H8":44.1,"C4H10":58.12}

acc_factor={"He":0,"H2":-0.22,"Ne":0,"N2":0.04,"Ar":0,"O2":0.022,"Kr":0,"C2H4":0.089,
        "N2O":0.142,"C2H2":0.187,"CO2":0.225,"H2S":0.1,"NH3":0.253,"SO2":0.251,"CH4":0.008,
        "DME":0.189, "C2H6":0.098, "C3H8":0.152, "C4H10":0.193}

cation_hydration_energy={"Li":475,"Na":365,"K":295,"Cs":250,"NH4":285,"H":1050,"Rb":275,"Mg":1830,"Ca":1505,
                         "Ba":1250,"Zn":1955,"Sr":1380,"Fe":1840,"Co":1915,"Ni":1980,"Mn":1760,
                         "Al":4525}

anion_hydration_energy={"Cl":340,"Br":315,"I":275,"NO3":300,"OH":430,"CNS":280,"HS":295,"HCO3":335,"SO4":1080,
                        "CO3":1315,"SO3":1295,"PO4":2765}

In [3]:
## input
salts=["NaCl","NaHCO3","CaCl2","CaCO3","CaSO4","MgCl2","MgCO3","KCl","MgSO4"]
gas="Ar"

In [4]:
#%%time

concentration_values=[]

low=0
delta=0.2
high=2+delta

concentrations=list(np.arange(low,high,delta))

for i1 in concentrations:
    for i2 in list(np.arange(low,high-i1,delta)):
        for i3 in list(np.arange(low,high-i1-i2,delta)):
            for i4 in list(np.arange(low,high-i1-i2-i3,delta)):
                for i5 in list(np.arange(low,high-i1-i2-i3-i4,delta)):
                    for i6 in list(np.arange(low,high-i1-i2-i3-i4-i5,delta)):
                        for i7 in list(np.arange(low,high-i1-i2-i3-i4-i5-i6,delta)):
                            for i8 in list(np.arange(low,high-i1-i2-i3-i4-i5-i6-i7,delta)):
                                for i9 in list(np.arange(low,high-i1-i2-i3-i4-i5-i6-i7-i8,delta)):
                                    concentration_values.append([i1,i2,i3,i4,i5,i6,i7,i8,i9])
                                    

a=len(concentration_values) # molarity conc
a

CPU times: user 694 ms, sys: 155 ms, total: 849 ms
Wall time: 722 ms


133993

In [5]:
saltss=len(concentration_values)*[salts]
salts_conc=list(zip(saltss,concentration_values))
salts_conc=[list(zip(each[0],each[1])) for each in salts_conc]
salts_conc[0]

[('NaCl', 0.0),
 ('NaHCO3', 0.0),
 ('CaCl2', 0.0),
 ('CaCO3', 0.0),
 ('CaSO4', 0.0),
 ('MgCl2', 0.0),
 ('MgCO3', 0.0),
 ('KCl', 0.0),
 ('MgSO4', 0.0)]

In [6]:
def splitsalt(salt):
    cation,catfactor,anion,anionfactor,i="",0,"",0,0

    if salt[:1] in cationcharge:
        cation=salt[:1]
        try:
            catfactor,i=int(salt[1]),2
        except:
            catfactor,i=1,1
    elif salt[:2] in cationcharge:
        cation=salt[:2]
        try:
            catfactor,i=int(salt[2]),3
        except:
            catfactor,i=1,2

    if salt[i:] in anioncharge:
        anion=salt[i:]
        anionfactor=1
    else:
        anion=salt[i:-1]
        anionfactor=int(salt[-1])
    return cation,catfactor,anion,anionfactor

In [7]:
cations=set()
anions=set()
for salt in salts:
    splits=splitsalt(salt)
    cation,anion=splits[0],splits[2]
    cations.add(cation)
    anions.add(anion)
cations=list(cations)
anions=list(anions)

In [8]:
cations=sorted(cations)
cations

['Ca', 'K', 'Mg', 'Na']

In [9]:
anions=sorted(anions)
anions

['CO3', 'Cl', 'HCO3', 'SO4']

In [10]:
#%%time
def createdata(salts,concentrations):
    features=[]
    #features.extend(salts)
    salts_conc=[each+"_concentration" for each in salts]
    features.extend(salts_conc)
    df=pd.DataFrame(data=concentrations,columns=features)
    return df

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 6.91 µs


In [11]:
#%%time
df=createdata(salts,concentration_values)

CPU times: user 140 ms, sys: 12 ms, total: 153 ms
Wall time: 152 ms


In [12]:
## add ions charge and energy

In [13]:
#%%time
charges_dict_list=[]
energy_dict_list=[]

for each in tqdm(salts_conc):
    charges={}
    energy={}
    for each2 in each:
        splits=splitsalt(each2[0])
        cation,anion=splits[0],splits[2]
        if each2[1]!=0:
            charges[cation]=cationcharge[cation]
            charges[anion]=anioncharge[anion]
            energy[cation]=cation_hydration_energy[cation]
            energy[anion]=anion_hydration_energy[anion]
        else:
            if cation not in charges:
                charges[cation]=0
            if anion not in charges:
                charges[anion]=0
            if cation not in energy:
                energy[cation]=0
            if anion not in energy:
                energy[anion]=0
    charges_dict_list.append(charges)
    energy_dict_list.append(energy)

100%|██████████| 133993/133993 [00:04<00:00, 26992.35it/s]

CPU times: user 4.75 s, sys: 210 ms, total: 4.96 s
Wall time: 4.98 s





In [14]:
charges_dict_list[0]

{'Na': 0, 'Cl': 0, 'HCO3': 0, 'Ca': 0, 'CO3': 0, 'SO4': 0, 'Mg': 0, 'K': 0}

In [15]:
energy_dict_list[0]

{'Na': 0, 'Cl': 0, 'HCO3': 0, 'Ca': 0, 'CO3': 0, 'SO4': 0, 'Mg': 0, 'K': 0}

In [16]:
#%%time
df["charges_dictionary"]=charges_dict_list
df["energy_dictionary"]=energy_dict_list

CPU times: user 29.9 ms, sys: 4.15 ms, total: 34 ms
Wall time: 32.3 ms


In [17]:
ions=list(df["charges_dictionary"].loc[0].keys())
charge_features=[ion+"_charge" for ion in ions]
energy_features=[ion+"_energy" for ion in ions]

for each in charge_features:
    df[each]=np.nan
for each in energy_features:
    df[each]=np.nan

In [18]:
#%%time
tqdm.pandas()

for each in charge_features:
    df[each]=df["charges_dictionary"].progress_apply(lambda x:x[each.split("_")[0]])
    
for each in energy_features:
    df[each]=df["energy_dictionary"].progress_apply(lambda x:x[each.split("_")[0]])

  from pandas import Panel
100%|██████████| 133993/133993 [00:00<00:00, 495443.11it/s]
100%|██████████| 133993/133993 [00:00<00:00, 504425.22it/s]
100%|██████████| 133993/133993 [00:00<00:00, 528415.97it/s]
100%|██████████| 133993/133993 [00:00<00:00, 527367.26it/s]
100%|██████████| 133993/133993 [00:00<00:00, 535183.96it/s]
100%|██████████| 133993/133993 [00:00<00:00, 532910.97it/s]
100%|██████████| 133993/133993 [00:00<00:00, 518197.41it/s]
100%|██████████| 133993/133993 [00:00<00:00, 554581.53it/s]
100%|██████████| 133993/133993 [00:00<00:00, 536521.53it/s]
100%|██████████| 133993/133993 [00:00<00:00, 531246.10it/s]
100%|██████████| 133993/133993 [00:00<00:00, 527447.93it/s]
100%|██████████| 133993/133993 [00:00<00:00, 513003.72it/s]
100%|██████████| 133993/133993 [00:00<00:00, 532824.07it/s]
100%|██████████| 133993/133993 [00:00<00:00, 525011.89it/s]
100%|██████████| 133993/133993 [00:00<00:00, 517731.02it/s]
100%|██████████| 133993/133993 [00:00<00:00, 554003.68it/s]

CPU times: user 4.07 s, sys: 65.9 ms, total: 4.13 s
Wall time: 4.12 s





In [19]:
## add ion concentrations

In [20]:
#%%time
ions_conc=[]

for each in tqdm(salts_conc):
    conc={}
    for each2 in each:
        cation,catval,anion,anionval=splitsalt(each2[0])
        if cation not in conc:
            conc[cation]=each2[1]*catval
        else:
            conc[cation]+=each2[1]*catval
            
        if anion not in conc:
            conc[anion]=each2[1]*anionval
        else:
            conc[anion]+=each2[1]*anionval
    ions_conc.append(conc)

100%|██████████| 133993/133993 [00:05<00:00, 24620.19it/s]

CPU times: user 5.27 s, sys: 152 ms, total: 5.42 s
Wall time: 5.45 s





In [21]:
#%%time
df["ions_conc_dic"]=ions_conc

CPU times: user 17.3 ms, sys: 883 µs, total: 18.1 ms
Wall time: 18.2 ms


In [22]:
ion_features=[ion+"_concentration" for ion in ions]

for each in ion_features:
    df[each]=np.nan

In [23]:
for each in ion_features:
    df[each]=df["ions_conc_dic"].progress_apply(lambda x:x[each.split("_")[0]])

100%|██████████| 133993/133993 [00:00<00:00, 405067.29it/s]
100%|██████████| 133993/133993 [00:00<00:00, 532881.66it/s]
100%|██████████| 133993/133993 [00:00<00:00, 568132.99it/s]
100%|██████████| 133993/133993 [00:00<00:00, 509554.82it/s]
100%|██████████| 133993/133993 [00:00<00:00, 575043.03it/s]
100%|██████████| 133993/133993 [00:00<00:00, 560101.35it/s]
100%|██████████| 133993/133993 [00:00<00:00, 513520.75it/s]
100%|██████████| 133993/133993 [00:00<00:00, 544404.07it/s]


In [24]:
def calcis(zc,cc,za,ca,M):
    out=(((zc*zc)*cc)+((za*za)*ca))/2
    return out*M

In [25]:
## calc rs

In [26]:
#%%time
rs_list=[]
for each in tqdm(salts_conc):
    val=0
    #print(each)
    for each2 in each:
        cat,catval,ani,anival=splitsalt(each2[0])
        isstr=calcis(cationcharge[cat],catval,anioncharge[ani],anival,each2[1])
        kg=Kg[gas]+Kg[cat]+Kg[ani]
        val+=isstr*kg
    #print(val)
    rs_list.append(val)
    #print("########################")

100%|██████████| 133993/133993 [00:05<00:00, 25799.25it/s]

CPU times: user 5.11 s, sys: 78 ms, total: 5.19 s
Wall time: 5.2 s





In [27]:
#%%time
df["rs"]=rs_list

CPU times: user 17.8 ms, sys: 1.34 ms, total: 19.2 ms
Wall time: 18.8 ms


In [28]:
df['gas'] = gas

In [29]:
df['gas_tc'] = df["gas"].progress_apply(lambda x:gas_tc[x])
df['gas_pc'] = df["gas"].progress_apply(lambda x:gas_pc[x])
df['acc_factor'] = df["gas"].progress_apply(lambda x:acc_factor[x])

100%|██████████| 133993/133993 [00:00<00:00, 812872.99it/s]
100%|██████████| 133993/133993 [00:00<00:00, 807736.02it/s]
100%|██████████| 133993/133993 [00:00<00:00, 728531.45it/s]


In [30]:
df.columns

Index(['NaCl_concentration', 'NaHCO3_concentration', 'CaCl2_concentration',
       'CaCO3_concentration', 'CaSO4_concentration', 'MgCl2_concentration',
       'MgCO3_concentration', 'KCl_concentration', 'MgSO4_concentration',
       'charges_dictionary', 'energy_dictionary', 'Na_charge', 'Cl_charge',
       'HCO3_charge', 'Ca_charge', 'CO3_charge', 'SO4_charge', 'Mg_charge',
       'K_charge', 'Na_energy', 'Cl_energy', 'HCO3_energy', 'Ca_energy',
       'CO3_energy', 'SO4_energy', 'Mg_energy', 'K_energy', 'ions_conc_dic',
       'Na_concentration', 'Cl_concentration', 'HCO3_concentration',
       'Ca_concentration', 'CO3_concentration', 'SO4_concentration',
       'Mg_concentration', 'K_concentration', 'rs', 'gas', 'gas_tc', 'gas_pc',
       'acc_factor'],
      dtype='object')

In [31]:
df.head(2)

Unnamed: 0,NaCl_concentration,NaHCO3_concentration,CaCl2_concentration,CaCO3_concentration,CaSO4_concentration,MgCl2_concentration,MgCO3_concentration,KCl_concentration,MgSO4_concentration,charges_dictionary,...,Ca_concentration,CO3_concentration,SO4_concentration,Mg_concentration,K_concentration,rs,gas,gas_tc,gas_pc,acc_factor
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'Na': 0, 'Cl': 0, 'HCO3': 0, 'Ca': 0, 'CO3': ...",...,0.0,0.0,0.0,0.0,0.0,0.0,Ar,150.8,48.7,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,"{'Na': 0, 'Cl': 0, 'HCO3': 0, 'Ca': 0, 'CO3': ...",...,0.0,0.0,0.2,0.2,0.0,0.08096,Ar,150.8,48.7,0


In [32]:
file_name = gas+'_data.csv'
df.to_csv(file_name)

In [33]:
features0=['Na_concentration', 'Cl_concentration', 'HCO3_concentration',
            'Ca_concentration', 'CO3_concentration', 'SO4_concentration','Mg_concentration', 
            'K_concentration']

features1=['Na_energy', 'Cl_energy', 'HCO3_energy', 'Ca_energy','CO3_energy', 'SO4_energy', 
            'Mg_energy', 'K_energy','Na_concentration', 'Cl_concentration', 'HCO3_concentration',
            'Ca_concentration', 'CO3_concentration', 'SO4_concentration','Mg_concentration', 
            'K_concentration']

features2=['Na_charge', 'Cl_charge', 'HCO3_charge', 'Ca_charge', 'CO3_charge', 'SO4_charge', 'Mg_charge',
            'K_charge','Na_concentration', 'Cl_concentration', 'HCO3_concentration',
            'Ca_concentration', 'CO3_concentration', 'SO4_concentration','Mg_concentration', 
            'K_concentration']

features3=['Na_charge', 'Cl_charge', 'HCO3_charge', 'Ca_charge', 'CO3_charge', 'SO4_charge', 'Mg_charge',
            'K_charge', 'Na_energy', 'Cl_energy', 'HCO3_energy', 'Ca_energy','CO3_energy', 'SO4_energy', 
            'Mg_energy', 'K_energy','Na_concentration', 'Cl_concentration', 'HCO3_concentration',
            'Ca_concentration', 'CO3_concentration', 'SO4_concentration','Mg_concentration', 
            'K_concentration']

In [34]:
## save as csv