In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
#import forestci as fci
#from scipy.optimize import LinearConstraint, Bounds,NonlinearConstraint, minimize, BFGS
import random
import pickle
from randomforest_regression_std import *
from sklearn.externals.joblib import Parallel, delayed

In [None]:
def read_features(text_file):
    f = open(text_file, 'r')
    x = f.readlines()
    f.close()
    features=[]
    for i in x:
        features.append(i[:-1])
    return features

In [None]:
meta = ['Reference DOI','Composition ID']

coercivity_feature_file='kept_coercivity.txt'
coercivity = pd.read_csv('HC_Original_FINEMET.csv').drop(columns=['Reference DOI'])
coercivity_feature=read_features(coercivity_feature_file)
coercivity_target=np.log(coercivity["Coercivity"])
coercivity=coercivity[coercivity_feature]


magneticsaturation_feature_file='kept_magnetic_saturation.txt'
magneticsaturation = pd.read_csv('MS_Original_FINEMET.csv').drop(columns=['Reference DOI'])
magneticsaturation_feature=read_features(magneticsaturation_feature_file)
magneticsaturation_target=magneticsaturation["Magnetic Saturation"]
magneticsaturation=magneticsaturation[magneticsaturation_feature]
magneticsaturation=magneticsaturation.fillna(magneticsaturation.mean())

In [None]:
def merge_features(a,b):
    aa=np.array(a)
    bb=np.array(b)
    new_array = np.unique(np.concatenate((aa,bb),0))
    return new_array

mergedf=merge_features(list(coercivity.columns.values),list(magneticsaturation.columns.values))

In [None]:
def expand_data(frame, features):
    newframe=pd.DataFrame()
    for i in features:
        if i not in list(frame):
            newframe[i]=0
        else:
            newframe[i]=frame[i]
    return newframe
'''Features now are union set of two properties, set 0 for features with no data'''
coercivity=expand_data(coercivity, mergedf)
magneticsaturation=expand_data(magneticsaturation, mergedf)

In [None]:
def model_rf(feature,target):
    rf = RandomForestRegressor(n_estimators=2000, min_samples_leaf=1,min_variance=0.001)
    feature.fillna(0, inplace=True)
    X=feature.as_matrix()
    length=X.shape[0]
    sample_weight=np.ones(length)
    sample_weight[:5]=2
    Y=target.as_matrix()
    rf.fit(X, Y, sample_weight)
    return rf, X


coercivitymodel, coercivitydata= model_rf(coercivity, coercivity_target)
magneticsaturationmodel, magneticsaturationdata= model_rf(magneticsaturation, magneticsaturation_target)


In [None]:
'''To get predictions and standard deviation from model, run the following line'''
#preds,std=coercivitymodel.predict(coercivitydata, return_std=True)



preds=magneticsaturationmodel.predict(magneticsaturationdata)
axismax=max(preds.max(),magneticsaturation_target.max())
axismin=min(preds.min(),magneticsaturation_target.min())
plt.plot(preds, magneticsaturation_target,'o')
plt.plot([axismin, axismax], [axismin, axismax], 'k--')
plt.ylabel('Reported ')
plt.xlabel('Predicted ')
plt.show()

In [None]:
def identify_element(mergedf):
    A=[]
    for i in mergedf:
        if len(i)<3:
            A.append(1)
        else:
            A.append(0)
    return np.array(A)

In [None]:
''' composition constraints, identify elements in features and make them add up to less than 100'''
A=identify_element(mergedf).reshape(1,-1)
lb=-1
ub=100.01
#compconstraint=LinearConstraint(A, lb, ub,keep_feasible=True)
lowerboundsc=np.min(coercivitydata,axis=0)

lowerboundssat=np.min(magneticsaturationdata,axis=0)
lowerbounds=np.zeros(len(lowerboundsc))
for i in range(len(lowerboundsc)):
    if lowerboundsc[i]==0 or lowerboundssat[i]==0:
        lowerbounds[i]=max(lowerboundsc[i],lowerboundssat[i])
    else:
        lowerbounds[i]=min(lowerboundsc[i],lowerboundssat[i])
    
     

upperboundsc=np.max(coercivitydata,axis=0)

upperboundssat=np.max(magneticsaturationdata,axis=0)

upperbounds=np.zeros(len(upperboundsc))
for i in range(len(upperboundsc)):
    if upperboundsc[i]==0 or upperboundssat[i]==0:
        upperbounds[i]=max(upperboundsc[i],upperboundssat[i])
    else:
        upperbounds[i]=max(upperboundsc[i],upperboundssat[i])
    
    if upperboundssat[i]==0:
        upperbounds[i]=max(upperbounds[i],upperboundssat[i])
    else:
        upperbounds[i]=max(upperbounds[i],upperboundssat[i])
    

In [None]:
''' nonlinear magnetostriction constraint, constaint add on magnetostriction between -5 and 5'''
'''Bounds on all features'''
for i, num in enumerate(upperbounds):
    if num==0:
        upperbounds[i]=100

In [None]:
#define optimization problem

def coercivityfunction(X,coercivitymodel):
    return coercivitymodel.predict(X.reshape(1,-1),return_std=False)



def optimingfunction(X,coercivitymodel,magneticsaturationmodel):
    #magnetopred=abs(magnetomodel.predict(X.reshape(1,-1), return_std=False))
    compsum=np.dot(A,X.reshape(-1,1))
    coercivitypred=coercivitymodel.predict(X.reshape(1,-1), return_std=False)
    magneticsatpred=magneticsaturationmodel.predict(X.reshape(1,-1), return_std=False)
    #print(magneticsatpred)
    if compsum>100:
        return (compsum-100)**2+100
    
    if  coercivitypred >0.5:
        return abs(coercivitypred-0.5)**2+50-magneticsatpred
    
    return -magneticsatpred

### First type of constraints
Include all the elements of interest

In [None]:
'''Run optimizations'''
diffbet=[]

results=[]
datass=[]
from scipy.optimize import differential_evolution

bounds=[]
for i in range(lowerbounds.shape[0]):
    bb=(lowerbounds[i],upperbounds[i])
    bounds.append(bb)

X0paths=[]  
X0path=[]
ypaths=[]
ypath=[]
def callbackF(Xi,convergence=0.05):
    X0path.append(Xi)
    ypath.append(optimingfunction(Xi,coercivitymodel,magneticsaturationmodel))
    return 
strategies=["best1bin"]

In [None]:

bb=bounds[:]
for chosestrategy in strategies:
    X0path=[]
    ypath=[]
    result=differential_evolution(optimingfunction,bb,args=(coercivitymodel,magneticsaturationmodel), \
                                  strategy=chosestrategy,popsize=30,mutation=(0.7,1.5),recombination=0.5, callback=callbackF, disp=1)
    X0paths.append(X0path[:])
    ypaths.append(ypath[:])
    results.append(result)
    datass.append(result['x'])

In [None]:
'''get prediction value after optimization '''
predslist=[]
magstriction=[]
curietemppreds=[]
magneticsaturationpreds=[]
for data in datass:
    predss=coercivitymodel.predict(data.reshape(1,-1))
    predslist.append(predss[0])
    magneticsaturationpreds.append( magneticsaturationmodel.predict(data.reshape(1,-1))[0])
magneticsaturationpreds=np.array(magneticsaturationpreds)

'''Sorting the optimized prediction ascending '''    
indexpred=np.argsort(predslist)
datass=np.array(datass)
datass2=datass[indexpred]
columnsname=coercivity.columns.values
optimizedframe=pd.DataFrame(data=datass2,columns=columnsname)
optimizedframe['coercivity']=sorted(predslist)

optimizedframe['magnetic-saturation']=magneticsaturationpreds[indexpred]
writer = pd.ExcelWriter('output_1.xlsx')
optimizedframe.to_excel(writer,'Sheet1')
writer.save()

### Second type of constraints
Constrain one element of the group "Ge, Mo, Nb, P" to zero

In [None]:
'''Run optimizations'''
diffbet=[]

results=[]
datass=[]
from scipy.optimize import differential_evolution

bounds=[]
for i in range(lowerbounds.shape[0]):
    bb=(lowerbounds[i],upperbounds[i])
    bounds.append(bb)

X0paths=[]  
X0path=[]
ypaths=[]
ypath=[]
def callbackF(Xi,convergence=0.05):
    X0path.append(Xi)
    ypath.append(optimingfunction(Xi,coercivitymodel,magneticsaturationmodel))
    return 
strategies=["best1bin"]

combos=[5,6,7,8];
for jj in range(5,9):#[1]
            bb=bounds[:]
            bb[jj]=(0,0)
            for chosestrategy in strategies:
                X0path=[]
                ypath=[]
                result=differential_evolution(optimingfunction,bb,args=(coercivitymodel,magneticsaturationmodel), \
                                              strategy=chosestrategy,popsize=30,mutation=(0.7,1.5),recombination=0.5, callback=callbackF, disp=1)
                X0paths.append(X0path[:])
                ypaths.append(ypath[:])
                results.append(result)
                datass.append(result['x'])

In [None]:
'''get prediction value after optimization '''
predslist=[]
magstriction=[]
curietemppreds=[]
magneticsaturationpreds=[]
for data in datass:
    predss=coercivitymodel.predict(data.reshape(1,-1))
    predslist.append(predss[0])
    magneticsaturationpreds.append( magneticsaturationmodel.predict(data.reshape(1,-1))[0])
magneticsaturationpreds=np.array(magneticsaturationpreds)

'''Sorting the optimized prediction ascending '''    
indexpred=np.argsort(predslist)
datass=np.array(datass)
datass2=datass[indexpred]
columnsname=coercivity.columns.values
optimizedframe=pd.DataFrame(data=datass2,columns=columnsname)
optimizedframe['coercivity']=sorted(predslist)

optimizedframe['magnetic-saturation']=magneticsaturationpreds[indexpred]
writer = pd.ExcelWriter('output_2.xlsx')
optimizedframe.to_excel(writer,'Sheet1')
writer.save()

### Third type of constraints
Constrain two elements of the group "Ge, Mo, Nb, P" to zero

In [None]:
'''Run optimizations'''
diffbet=[]

results=[]
datass=[]
from scipy.optimize import differential_evolution

bounds=[]
for i in range(lowerbounds.shape[0]):
    bb=(lowerbounds[i],upperbounds[i])
    bounds.append(bb)

X0paths=[]  
X0path=[]
ypaths=[]
ypath=[]
def callbackF(Xi,convergence=0.05):
    X0path.append(Xi)
    ypath.append(optimingfunction(Xi,coercivitymodel,magneticsaturationmodel))
    return 
strategies=["best1bin"]

combos=[5,6,7,8];
for jj in range(5,9):
    for ii in range(jj,9):
            bb=bounds[:]
            bb[jj]=(0,0)
            bb[ii]=(0,0)
            for chosestrategy in strategies:
                X0path=[]
                ypath=[]
                result=differential_evolution(optimingfunction,bb,args=(coercivitymodel,magneticsaturationmodel), \
                                              strategy=chosestrategy,popsize=30,mutation=(0.7,1.5),recombination=0.5, callback=callbackF, disp=1)
                X0paths.append(X0path[:])
                ypaths.append(ypath[:])
                results.append(result)
                datass.append(result['x'])

In [None]:
'''get prediction value after optimization '''
predslist=[]
magstriction=[]
curietemppreds=[]
magneticsaturationpreds=[]
for data in datass:
    predss=coercivitymodel.predict(data.reshape(1,-1))
    predslist.append(predss[0])
    magneticsaturationpreds.append( magneticsaturationmodel.predict(data.reshape(1,-1))[0])
magneticsaturationpreds=np.array(magneticsaturationpreds)

'''Sorting the optimized prediction ascending '''    
indexpred=np.argsort(predslist)
datass=np.array(datass)
datass2=datass[indexpred]
columnsname=coercivity.columns.values
optimizedframe=pd.DataFrame(data=datass2,columns=columnsname)
optimizedframe['coercivity']=sorted(predslist)

optimizedframe['magnetic-saturation']=magneticsaturationpreds[indexpred]
writer = pd.ExcelWriter('output_3.xlsx')
optimizedframe.to_excel(writer,'Sheet1')
writer.save()

### Fourth type of constraints
Constrain three elements of the group "Ge, Mo, Nb, P" to zero

In [None]:
diffbet=[]

results=[]
datass=[]
from scipy.optimize import differential_evolution

bounds=[]
for i in range(lowerbounds.shape[0]):
    bb=(lowerbounds[i],upperbounds[i])
    bounds.append(bb)

X0paths=[]  
X0path=[]
ypaths=[]
ypath=[]
def callbackF(Xi,convergence=0.05):
    X0path.append(Xi)
    ypath.append(optimingfunction(Xi,coercivitymodel,magneticsaturationmodel))
    return 
strategies=["best1bin"]

combos=[5,6,7,8];
for jj in range(5,9):#[1]
    for ii in range(jj,9):#[7,8,12]:
        for iii in range(ii,9): #[2,6,9,13]:
            bb=bounds[:]
            bb[jj]=(0,0)
            bb[ii]=(0,0)
            bb[iii]=(0,0)
            for chosestrategy in strategies:
                X0path=[]
                ypath=[]
                result=differential_evolution(optimingfunction,bb,args=(coercivitymodel,magneticsaturationmodel), \
                                              strategy=chosestrategy,popsize=30,mutation=(0.7,1.5),recombination=0.5, callback=callbackF, disp=1)
                X0paths.append(X0path[:])
                ypaths.append(ypath[:])
                results.append(result)
                datass.append(result['x'])


In [None]:
'''get prediction value after optimization '''
predslist=[]
magstriction=[]
curietemppreds=[]
magneticsaturationpreds=[]
for data in datass:
    predss=coercivitymodel.predict(data.reshape(1,-1))
    predslist.append(predss[0])
    magneticsaturationpreds.append( magneticsaturationmodel.predict(data.reshape(1,-1))[0])
magneticsaturationpreds=np.array(magneticsaturationpreds)

'''Sorting the optimized prediction ascending '''    
indexpred=np.argsort(predslist)
datass=np.array(datass)
datass2=datass[indexpred]
columnsname=coercivity.columns.values
optimizedframe=pd.DataFrame(data=datass2,columns=columnsname)
optimizedframe['coercivity']=sorted(predslist)

optimizedframe['magnetic-saturation']=magneticsaturationpreds[indexpred]
writer = pd.ExcelWriter('output_4.xlsx')
optimizedframe.to_excel(writer,'Sheet1')
writer.save()

### Combine results together

In [None]:
import glob
import pandas as pd

files = glob.glob('./output_*.xlsx', recursive=True)
frame=pd.DataFrame()
for file in files:
    frame=pd.concat([frame, pd.read_excel(file)])
frame=frame.reset_index(drop=True)

frame.to_excel('together-output.xlsx')