In [35]:
import seaborn as sns
import time

data = sns.load_dataset('iris')

def compute_class(petal_length):
    if petal_length <= 2:
        return 1
    elif 2 < petal_length < 5:
        return 2
    else:
        return 3

start = time.time()

class_list = list()
for i in range(len(data)):
    petal_length = data.iloc[i]['petal_length']
    class_num = compute_class(petal_length)
    class_list.append(class_num)

end = time.time()
print("For-loop run time = {}".format(end - start))

For-loop run time = 0.01698756217956543


In [12]:
start = time.time()

class_list = list()
for index, data_row in data.iterrows():
    petal_length = data_row['petal_length']
    class_num = compute_class(petal_length)
    class_list.append(class_num)

end = time.time()
print("Iterrows run time = {}".format(end - start))

Iterrows run time = 0.00797891616821289


In [13]:
start = time.time()

class_list = data.apply(lambda row: compute_class(row['petal_length']), axis=1)

end = time.time()
print(".apply() run time = {}".format(end - start))

.apply() run time = 0.0019948482513427734


In [14]:
import pandas as pd
start = time.time()

class_list = pd.cut(x=data.petal_length,
                   bins=[0, 2, 5, 100],
                   include_lowest=True,
                   labels=[1, 2, 3]).astype(int)

end = time.time()
print(".cut() run time = {}".format(end - start))

.cut() run time = 0.001993894577026367


In [76]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,2
1,4.9,3.0,1.4,0.2,2
2,4.7,3.2,1.3,0.2,2
3,4.6,3.1,1.5,0.2,2
4,5.0,3.6,1.4,0.2,2


In [17]:
fre = data['species'].value_counts()

In [36]:
tar = 1
for i in fre.index:
    data = data.replace(i, tar)
    tar += 1 

In [75]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,2
1,4.9,3.0,1.4,0.2,2
2,4.7,3.2,1.3,0.2,2
3,4.6,3.1,1.5,0.2,2
4,5.0,3.6,1.4,0.2,2


In [119]:
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.svm import SVC
from sklearn.svm import NuSVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
import numpy as np
skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
trainy = data['species']
trainx = data.drop(['species'],axis=1)
SVCclfoof = np.zeros(len(trainy))
LogisticRegressionclfoof = np.zeros(len(trainy))
MLPoof = np.zeros(len(trainy))
NuSVCoof = np.zeros(len(trainy))
gnboof = np.zeros(len(trainy))

In [120]:
for train_index, test_index in skf.split(trainx, trainy):
    SVCclf = SVC(gamma='auto',probability=True)
    LogisticRegressionclf = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
    MLP = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 4), random_state=1)
    NuSVCclf = NuSVC(nu=0.15, kernel='rbf',gamma='scale')
    gnb = GaussianNB()
    
    SVCclf.fit(trainx.iloc[train_index], trainy.iloc[train_index])
    LogisticRegressionclf.fit(trainx.iloc[train_index], trainy.iloc[train_index])
    MLP.fit(trainx.iloc[train_index], trainy.iloc[train_index])
    NuSVCclf.fit(trainx.iloc[train_index], trainy.iloc[train_index])
    gnb.fit(trainx.iloc[train_index], trainy.iloc[train_index])
    
    SVCclfoof[test_index] = SVCclf.predict(trainx.iloc[test_index])
    LogisticRegressionclfoof[test_index] = LogisticRegressionclf.predict(trainx.iloc[test_index])
    MLPoof[test_index] = MLP.predict(trainx.iloc[test_index])
    NuSVCoof[test_index] = NuSVCclf.predict(trainx.iloc[test_index])
    gnboof[test_index] = gnb.predict(trainx.iloc[test_index])



In [117]:
from sklearn.metrics import f1_score
f1_score(trainy, SVCclfoof, average='macro')

0.9866666666666667

In [116]:
f1_score(trainy, LogisticRegressionclfoof, average='macro')

0.9666633329999667

In [115]:
f1_score(trainy, MLPoof, average='macro')

0.9733226623982927

In [114]:
f1_score(trainy, NuSVCoof, average='macro')

0.97999799979998

In [121]:
f1_score(trainy, gnboof, average='macro')

0.9533286661999534

Keras

In [122]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [124]:
seed = 7
np.random.seed(seed)

In [133]:
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(5, input_dim=4, activation='relu'))
    model.add(Dense(3, input_dim=4, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [134]:
estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=5, verbose=0)

In [135]:
kfold = KFold(n_splits=5, shuffle=True, random_state=seed)
results = cross_val_score(estimator, trainx, trainy, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 96.00% (3.89%)
