# MACHINE LEARNING WITH PYTHON COOKBOOK

In [34]:
---------------------------------- # Chapter1: Vectors, Matrices, Arrays # --------------------------------
# Create vector as a row
import numpy as np
from scipy import sparse
vector_row = np.array([1,2,3])
vector_column = np.array([[1],[2],[3]])
matrix = np.array([[1,2],
                 [1,2],
                 [1,2]])
# Create Sparse Matrix
matrix_sparse = sparse.csr_matrix(np.array([[0,0,1],[0,1,0],[5,3,0]]))
------------------------------------------------------------------------------------------------------------------------
#Apply Operations to Elements
matrix = np.array([[1,2,3],[4,5,6],[7,8,9]])
vectorized_function = np.vectorize(lambda x: x+100)
vectorized_function(matrix)
# Finding Max&Min
np.max(matrix, axis=0)
np.min(matrix, axis=1)
# Calculate Average, Variance, Standard Deviation
np.mean(matrix) # axis option
np.var(matrix)
np.std(matrix)
# Reshape Arrays
matrix.reshape(-1,1) #reshape(1,-1)
# Transposing Matrix
matrix.T
# Flattening a Matrix (1-dimensional array)
matrix.flatten()
# Finding Rank of Matrix
np.linalg.matrix_rank(matrix)
# Calculate the Determinant
np.linalg.det(matrix)
# Getting the Diagonal of a Matrix
matrix.diagonal(offset=0)
# Calculate Eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(matrix)
# Inverting a Matrix
np.linalg.inv(matrix)
--------------------------------------------------------------------------------------------------------------------
# Generating Random Values
np.random.seed(0)
np.random.random(3)
np.random.randint(0,11,3)
# Draw numbers from a normal distribution with mean=0 and std = 1
np.random.normal(0.0,1.0,5)
# Draw numbers from a logistic distribution with mean=0 and scale of 1.0
np.random.logistic(0.0,1.0,5)
# Draw numbers greater than or equal to 1.0 and less than 2.0
np.random.uniform(1.0,2.0,5)

In [35]:
print(matrix_sparse)

  (0, 2)	1
  (1, 1)	1
  (2, 0)	5
  (2, 1)	3


In [None]:
--------------------# Chapter2: Loading Data #---------------------------------
from sklearn import datasets
digits = datasets.load_digits()
features = digits.data
target = digits.target
"""toy dataset: load_boston, load_iris, load_digits"""
#Load data from SQL
import pandas as pd
from sqlalchemy import create_engine
database_connection = create_engine('sqlite://sample.db')
df = pd.read_sql_query('SELECT * FROM data',database_connection)

In [None]:
-------------------# Chapter3: Data Wrangling ------------------------------
# Replace values with regular expressions:
df.replace(r"[0-9]","", regex=True)
# Grouping rows by Time
time_index = pd.date_range('06/06/2017', periods=100000, freq='305')
df = pd.DataFrame(index=time_index)
# Group rows by week
df.resample('W').sum()

In [None]:
--------------------# Chapter4: Handling Numerical Data ------------------------------------------------------
from sklearn import preprocessing
minmax_scale = preprocessing.MinMaxScaler(feature_range=(0,1))
scaler = preprocessing.StandardScaler()
scaled_feture = minmax_scale.fit_transform(feature)
robust_scaler = preprocessing.RobustScaler() # outliers standard
# Normalizing Observations
"""Normalizer rescales the values on individual observations """
normalizer = Normalizer(norm="l2")
# PolynomialFeatures
interaction = PolynomialFeatures(degree=2)
# Transforming Features
"Like pandas apply"
feature_transform = FunctionTransformer(function) 

# Detect outliers
outlier_detector = EllipticEnvelope(contamination=.1)
def indices_of_outliers(x):
    q1, q3 = np.percentile(x, [25,75])
    iqr = q3 - q1
    lower_bound = q1 - (iqr * 1.5)
    uper_bound = q3 + (iqr * 1.5)
    return np.where((x > upper_bound) | (x < lower_bound))

# Imputing missing values
"""missing value using KNN"""
features_knn_imputed = KNN(k-5, verbose=0).complete(standardized_features)
"""missing value using Imputer"""
mean_imputer = Imputer(strategy="mean", axis = 0)

In [None]:
-------------------------- # Chapter5: Handling Categorical Data -----------------------------------------
# Encoding nominal categorical features
from sklearn.processing import LabelBinarizer, MultiLabelBinarizer
one_hot = LabelBinarizer()
one_hot.fit_transform(feature)
""" reverse one-hot encoding"""
one_hot.reverse_transform(one_hot.fit_transform(feature))
""" using pandas get_dummies"""
pd.get_dummies(feature[:,0])
one_hot_multiclass = MultiLabelBinarizer()
--------------------------------------------------------------------------------------
# Imputing Missing Class Values
import numpy as np
from sklearn.neighbors import KNeighborsClassfier
X = np.array([[0, 2.1, 1.45],
             [1, 1.18, 1.33],
             [0, 1.22, 1.27],
             [1, -0.21, -1.19]])
X_with_nan = np.array([[np.nan, 0.87, 1.31],
                      [np.nan, -0.67, -0,22]])
clf = KNeighborsClassfier(3, weights='distance')
trained_model = clf.fit(X[:,1:],X[:,0])
imputed_values = trained_model.predict(X_with_nan[:,1:])
X_with_imputed = np.hstack((imputed_values.reshape(-1,1), X_with_nan[:,1:]))
np.vstack((X_with_imputed, X))

In [None]:
---------------------------- # Chapter9: Dimensionality Reduction Using Feature Extraction --------------------------
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets
digits = datasets.load_digits()
features = StandardScaler().fit_transform(digits.data)
pca = PCA(n_components=0.99, whiten=True)
features_pca = pca.fit_transform(features)
print("Original number of features", features.shape[1])
print("Reduced number of features", features_pca.shape[1])
""" PCA principle: http://www.math.union.edu/~jaureguj/PCA.pdf"""
# Using an extension of PCA
from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles
feature, _ = make_circles(n_samples=1000, random_state = 1, noise=0.1, factor=0.1)
kpca = KernelPCA(kernel="rbf", gamma=15, n_components=1)
features_kpca = kpca.fit_transform(features)
print("Original number of features:", features.shape[1])
print("Reduced number of features:", features_kpca.shape[1])
""" https://sebastianraschka.com/Articles/2014_kernel_pca.html """
-------------------------------------------------------------------------------------------------------------
# Reducing features by Maximizing class separability
from sklearn import datasets
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
iris = datasets.load_iris()
features = iris.data
target = iris.target
lda = LinearDiscriminantAnalysis(n_components=1)
features_lda = lda.fit(features, target).transform(features)
lda.explained_variance_ratio_
#Choose n_components
lda = LinearDiscriminantAnalysis(n_components = None)
features_lda = lda.fit(features, target)
lda_var_ratios = lda.explained_variance_ratio_
def select_n_components(var_ratio, goal_var: float) -> int:
    total_variance = 0.0
    n_components = 0
    for explained_variance in var_ratio:
        total_variance += explained_variance
        n_components += 1
        if total_variance >= goal_ratio:
            break
    return n_components
select_n_components(lda_var_ratios, 0.95)
# Reducing using Matrix Factorization
from sklearn.decomposition import NMF
from sklearn import datasets
digits = datasets.load_digits()
features = digits.data
nmf = NMF(n_components=10, random_state=1)
features_nmf = nmf.fit_transform(features)
# Reducing on Parse Data
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr.matrix
from sklearn import datasets
import numpy as np
digits = datasets.load_digits()
features = StandardScaler().fit_transform(digits)
features_sparse = csr.matrix(features)
tsvd = TruncatedSVD(n_components=10)
features_sparse_tsvd = tsvd.fit(features_sparse).transform(features_sparse)

In [None]:
------------------------------- # Chapter10: Dimensionality Reduction Using feature selection ------------------------
# Thresholding Numerical Feature Variance
from sklearn import datasets
from sklearn.feature_selection import VarianceThreshold
iris = datasets.load_iris()
features = iris.data
target = iris.target
thresholder = VarianceThreshold(threshold=.5)
features_high_variance = thresholder.fit_transform(features)
features_high_variance[0:3]
# Handling highly correlated features
dataframe.corr()
---> remove correlated features
# Removing Irrelevant features for Classfication
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif
iris = load_iris()
features = iris.data
target = iris.target
features = features.astype(int)
chi2_selector = SelectKBest(chi2, k=2)
features_kbest = chi2_selector.fit_transform(features, target)
# Using SelectPercentile
from sklearn.feature_selection import SelectPercentile
fvalue_selector = SelectPerccentile(f_classif, percentile=75)
features_kbest = fvalue_selector.fit_transform(features, target)
# Recursively Eliminating features
import warnings
from sklearn.datasets import make_regression
from sklearn.feature_selection import RFECV
from sklearn import datasets, linear_model
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
features, target = make_regression(n_samples=10000, n_features=100, n_informative=2)
ols = linear_model.LinearRegression()
rfecv = RFECV(estimator=ols, step=1, scoring="neg_mean_squared_error")
rfecv.fit(features, target)
rfecv.transform(features)
rfecv.n_features_
rfecv.ranking_
"""https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html#sphx-glr-auto-examples-feature-selection-plot-rfe-with-cross-validation-py
"""

In [None]:
----------------------------------- #Chapter11: Model Evaluation -----------------------------------------------
# Cross-Validating Models
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticsRegression
from sklearn.preprocessing import StandardScaler
digits = datasets.load_digits()
features = digits.data
target = digits.target
standardizer = StandardScaler()
lr = LogisticRegression()
pipeline = make_pipeline(standardizer, lr)
kf = KFold(n_splits=10, shuffle=True, random_state=1)
cv_results = cross_val_score(pipeline, features, target, cv=kf, scoring="accuracy" #loss function
                            n_jobs=-1 #Use all CPU scores)
cv_results.mean()
"""Using StratifiedKFold for imbalance target vector for example, 80% male and 20% female"""
# Evaluating Binary Classifier Thresholds
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklean.linear_model import LogisticRegression
from sklearn.metrics import roc_curse, roc_auc_score
from sklearn.model_selection import train_test_split
features, target = make_classification(n_samples=100000, n_features=10, n_classes=2, n_informative=3, random_state=3)
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.1, random_state=1)
lr = LogisticRegression()
lr.fit(features_train, target_train)
target_proba = lr.predict_proba(features_test)[:,1]
false_positive_rate, true_positive_rate, threshold = roc_curve(target test, target_proba)
plt.title("Receiver operating characteristic")
plt.plot(false_postive_rate, true_positive_rate)
plt.plot([0, 1], ls="--")
plt.plot([0, 0],[1,0], c=".7"), plt.plot([1,1],c=".7")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.show()
"""https://community.alteryx.com/t5/Data-Science/ROC-Curves-in-Python-and-R/ba-p/138430"""
                             """http://gim.unmc.edu/dxtests/roc3.htm"""
#Evaluating Multiclass Classifier 
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
features, target = make_classification(n_samples=10000, n_features=3, n_classes=3, n_informative=3,n_redundant=3 ,random_state=1)
lr = LogisticRegression()
cross_val_score(lr, features, target, scoring="accuracy")
#Visualizing a Classifier's Performance
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
iris = datasets.load_iris()
features = iris.data
target = iris.target
class_names = iris.target_names
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.2, random_state=1)
classifier = LigisticRegression()
target_predicted = classifier.fit(features_train, target_train).predict(features_test)
matrix = confusion_matrix(target_test, target_predicted)
dataframe = pd.DataFrame(matrix, index = class_names, columns = class_names)
sns.heatmap(dataframe, annot=True, cbar=None, cmap="Blues")
plt.title("Confusion Matrix")
plt.tight_layout()
plt.xlabel("Predicted Class")
plt.ylabel("True Class")
plt.show()

#Evaluating Regression
#MSE for regression model
"""MSE >< neg_mse"""
#Coefficient of determination
"""https://en.wikipedia.org/wiki/Coefficient_of_determination"""
cross_value_score(ols, features, target, scoring"neg_mean_squared_error"|"r2")

#Evaluating Cluster
from sklearn.metrics import silhouette_score
from sklearn import datasets
from sklearn.cluster import K_Means
from sklearn.datasets import make_blobs
features, _ = make_blobs(n_samples = 1000, n_features=10, centers=3, cluster_std=.5, shuffle=True, random_state=1)
model = KMeans(n_clusters=3, random_state=1).fit(features)
target_predicted = model.labels_
silhoutte_score(features, target_predicted)
"""https://scikit-learn.org/stable/modules/generated/sklearn.metrics.silhouette_score.html#sklearn.metrics.silhouette_score"""


In [None]:
------------------------------- #Chapter12: Model Selection ----------------------------------------------------
#Select models using Exhaustive Search
import numpy as np
from sklearn import linear_model, datasets
from sklearn.model_selection import GridSearchCV
iris = dataset.load_iris()
features = iris.data
target = iris.target
lr = linear_model.LogisticRegression()
penalty = ['l1','l2']
C = np.logspace(0,4,10)
hyperparameters = dict(C=C, penalty = penalty)
gridsearch = GridSearchCV(lr, hyperparameters, cv=5, verbose=0)
best_model = gridsearch.fit(features, target)
best_model.best_estimator_.get_params()

#Select models using Randomized Search
from scipy.stats import uniform
from sklearn import linear_model, datasets
from sklearn.model_selection import RandomizedSearchCV
iris = dataset.load_iris()
features = iris.data
target = iris.target
lr = linear_model.LogisticRegression()
penalty = ['l1','l2']
C = uniform(loc=0, scale=4)
hyperparameters = dict(C=C, penalty = penalty)
randomizedsearch = RandomizedSearchCV(lr, random_state=1, n_iter=100, cv=5, verbose=0, n_jobs=-1)
best_model = randomizedsearch.fit(features, target)
"""https://www.jmlr.org/papers/volume13/bergstra12a/bergstra12a.pdf"""

#Select Models from multiple learning algorithms
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
np.random.seed(0)
iris = dataset.load_iris()
features = iris.data
target = iris.target
pipe = Pipeline([('Classifier',RandomForestClassfier())])
search_space = [{"classifier": [LogisticRegression()],
                 "classifier_penalty" : ['l1','l2'],
                 "classifier_C": np.logspace(0,4,10)},
                {"classifier": [RandomForestClassifier()],
                 "classifier__n_estimators": [10, 100, 1000],
                 "classifier__max_features": [1, 2, 3]}]
gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=0)
best_model = gridsearch.fit(features, target)
best_model.best_estimator_.get_params()['classifier']

#Select models when processing
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
np.random.seed(0)
iris = datasets.load_iris()
features = iris.data
target = iris.target
preprocess = FeatureUnion([("std", StandardScaler()), ("pca", PCA())])
pipe = pipeline([("preprocess", preprocess),
                 ("classifier", LogisticRegression())])
search_space = [{"preprocess__pca__n_components": 1,2,3],
                 "classifier_penalty": ['l1','l2'],
                 "classifier_C": np.logspace(0,4,10)}]
clf = GridSearchCV(pipe, search_space, cv=5, verbose=0, n_jobs=-1)
best_model = clf.fit(features, target)

#Evaluating Performance after model selection
"""Conduct nested cross-validation and output the average score"""
cross_val_score(gridsearch, features, target).mean()

In [None]:
-----------------------------#Chapter13: Linear Regression ------------------------------------
# Fitting a line
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
boston = load_boston()
features = boston.data[:,0:2]
target = boston.target
lr = LinearRegression()
model = lr.fit(features, target)
model.intercept_
model.coef_

#Handling interactive effects
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynominalFeatures
boston = load_boston()
features = boston.data[:, 0:2]
target = boston.target
interaction = PolynominalFeatures(degree=3, include_bias=False, interaction_only=True)
features_interaction = interaction.fit_transform(features)
lr = LinearRegression()
model = lr.fit(features_interaction, target)

#Fitting a Non-Linear Relationship
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynominalFeatures
boston = load_boston()
features = boston.data[:,0:1]
target = boston.target
polynominal = PolynominalFeatures(degress=3, include_bias=False)
features_polynominal = polynominal.fit_transform(features)
lr = LinearRegression()
model = lr.fit(features_polynominal, target)

#Reduce Variance with Regularization
from sklearn.linear_model import Ridge
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
boston = load_boston()
features = boston.data
target = boston.target
scaler = StandardScaler()
features_standardizer = scaler.fit_transform(features)
lr = Ridge(alpha=0.5)
model = lr.fit(features_standardizer, target)
"""select alpha"""
from sklearn.linear_model import RidgeCV
regr_cv = RidgeCV(aphas=[0.1, 1.0, 10.0])
model_cv = regr_cv.fit(features_standardized, target)
model_cv.alpha_

#Reduce Features with Lasso
from sklearn.linear_model import Lasso
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
boston = load_boston()
features = boston.data
target = boston.target
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
lr = Lasso(alpha=0.5)
model = lr.fit(features_standardized, target)


In [None]:
------------------------------------ #Chapter14: Trees and Forests --------------------------------------------
#DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import datasets
import pydotplus
from IPython.display import Image
from sklearn import tree
iris = datasets.load_iris()
features = iris.data
target = iris.target
dt = DecisionTreeClassifier(criterion='entropy', random_state=0)
model = dt.fit(features, target)
dot_data = tree.export_graphviz(dt, out_file=None, feature_names = iris.feature_names, class_names = iris.target_names)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())
#DecisionTreeRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn fimport datasets
boston = datasets.load_boston()
features = boston.data[:,0:2]
target = boston.target
dt = DecisionTreeRegressor(criterion='mae', random_state=0)
model = dt.fit(features, target)

#RandomForestClassfier
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
iris = datasets.load_iris()
features = iris.data
target = iris.target
rf = RandomForestClassifier(criterion="entropy",random_state=0, n_jobs=-1)
model = rf.fit(features, target)
#RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import datasets
boston = datasets.load_boston()
feature = boston.data[:,0:2]
target = boston.target
rf = RandomForestRegressor(random_state=0, n_jobs=-1)
model = rf.fit(features, target)

# identify important Features in Random Forests
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
iris = datasets.load_iris()
features = iris.data
target = iris.target
rf = RandomForestClassifier(random_state=0, n_jobs=-1)
model = rf.fit(features, target)
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
names = [iris.feature_names[i] for i in indices]
plt.figure()
plt.title("Feature Importance")
plt.bar(range(features.shape[1]), importances[indices])
plt.xtick(range(features.shape[1]), names, rotation=90)
plt.show()

#Select important features
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
from sklearn.feature_selection import SelectFromModel
iris = datasets.load_iris()
features = iris.data
target = iris.target
rf = RandomForestClassifier(random_state=0, n_jobs=-1)
selector = SelectFromModel(rf, threshold=0.3)
features_important = selector.fit_transform(features, target)
model = rf.fit(features_important, target)

#Improve performance through boosting
from sklearn.ensemble import AdaBoostClassifier
from sklearn import datasets
iris = datasets.load_iris()
features = iris.data
target = iris.target
adaboost = AdaBoostClassifier(random_state=0)
#Evaluate RandomForest with out-of-bag Errors
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
iris = datasets.load_iris()
features = iris.data
target = iris.target
rf = RandomForestClassifier(random_state=0, n_estimators=1000, oob_score=True, n_jobs=-1)
model = rf.fit(features, target)
rf.oob_score_

In [None]:
-------------------------------- #Chapter15: K-Nearest Neighbors ---------------------------------------
#Observation's Nearest Neighbors
from sklearn import datasets
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
iris = datasets.load_iris()
features = iris.data
target = iris.target
standardizer = StandardScaler()
features_standardized = standardizer.fit_transform(features)
nearest_neighbors = NearestNeighbors(n_neighbors=2).fit(features_standardized)

#K-Nearest Neighbor Classifier
from sklearn.neighbors import KneighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
standardizer = StandardScaler()
X_std = standardizer.fit_transform(X)
knn = KNeighborsClassifier(n_neighbors=5,n_jobs=-1).fit(X_std,y)
#Identify best neighborhood size
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, FeatureUnion
fron sklearn.model_selection import GridSearchCV
iris = datasets.load_iris()
features = iris.data
target = iris.target
standardizer = StandardScaler()
features_standardized = standardizer.fit_transform(features)
knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)
pipe = Pipeline([("standardizer", standardizer), ("knn", knn)])
search_space = [{"knn__n_neighbors":[1,2,3,4,5,6,7,8,9,10]}]
classifier = GridSearchCV(pipe, search_space, cv=5, verbose=0).fit(features_standardized, target)
classifier.best_estimator_.get_params()["knn__n_neighbors"]

#Radius-Based Nearest Neighbor Classifier
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
iris = datasets.load_iris()
features = iris.data
target = iris.target
standardizer = StandardScaler()
features_standardized = StandardScaler.fit_transform(features)
rnn = RadiusNeighborsClassfier(radius=.5, n_jobs=-1).fit(features_standardized, target)

In [None]:
-----------------------------------#Chapter16: LogisticRegression --------------------------------------------
#Binary Classifier
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
iris = datasets.load_iris()
features = iris.data[:100,:]
target = iris.target[:100]
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
lr = LogisticRegression(random_state=0)
model = lr.fit(features_standardized, target)

#Multiclass Classifier
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
iris = datasets.load_iris()
features = iris.data
target = iris.target
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
lr = LogisticRegression(random_state=0, multi_class='ovr')
model = lr.fit(features_standardized, target)

#Reduce Variance Through Regularization
from sklearn.linear_model import LogisticRegressionCV
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
iris = datasets.load_iris()
features = iris.data
target = iris.target
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
lr = LogisticRegressionCV(penalty='l2', Cs=10, random_state=0, n_jobs=-1)
model = lr.fit(features_standardized, target)
#Classifier on very large data
lr = LogisticRegression(random_state=0, solver="sag")
"""handling imbalance class"""
lr = LogisticRegression(random_state=0, class_weight="balanced")

In [None]:
--------------------------------------#Chapter17: Support Vector Machines -----------------------------------------------
#Linear Classifier
from sklearn.svm import LinearSVC
from sklearn import datasets
from sklaern.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
iris = datasets.load_iris()
features = iris.data[:100, :2]
target = iris.target[:100]
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
svc = LinearSVC(C=1.0)
model = svc.fit(features_standardized, target)
color = ["black" if c==0 else "lightgrey" for c in target]
plt.scatter(features_standardized[:,0], features_standardized[:,1], c=color)
w = svc.coef_[0]
a = -w[0]/w[1]
xx = np.linspace(-2.5, 2.5)
yy = a*xx - (svc.intercept_[0])/w[1]
plt.plot(xx,yy)
plt.axis("off")
plt.show()

#Handle Linearly Inseparable Classes using Kernels
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import numpy as np
np.random.seed(0)
features = np.random.randn(200, 2)
target_xor = np.logical_xor(features[:,0] > 0, features[:,1] > 0)
target = np.where(target_xor, 0, 1)
svc = SVC(kernel="rbf", random_state=0, gamma=1, C=1)
model = svc.fit(features, target)


In [None]:
------------------------------------#Chapter18: Naive Bayes -----------------------------------------------
#Training a Classifier for continous features
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
iris = datasets.load_iris()
features = iris.data
target = iris.target
classifier = GaussianNB()
model = classifier.fit(features, target)
"""Gaussian naive bayes is best used in cases all our features are continuous"""

#Training a Classifier for Discrete and count features
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
text_data = np.array(['I love VN. VN!',
                     'VN is best',
                     'Brazil beats both'])
count = CountVectorizer()
bag_of_words = count.fit_transform(text_data)
features = bag_of_words.toarray()
target = np.array([0,0,1])
classifier = MultinomialNB(class_prior = [0.25,0.5])
model = classifier.fit(features, target)
"""Most common uses of multinomial naive Bayes is text classification"""

#Training a Naive Bayes Classifier for Binary Features
import numpy as np
from sklearn.naive_bayes import BernoulliNB
features = np.random.randint(2, size=(100,3))
target = np.random.radint(2, size=(100,1)).ravel()
classifier = BernoulliNB(class_prior=[0.25,0.5])
model = classifier.fit(features, target)
"""The bernoulli naive bayes classifer assumes that all features are binary"""

#Calibrating predicted probabilities
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.calibration import CalibratedClassifierCV
iris = datasets.load_iris()
features = iris.data
target = iris.target
classifier = GaussianNB()
classifier_sigmoid = CalibratedClassifierCV(classifier, cv=2, method='sigmoid')
classifier_sigmoid.fit(features, target)
new_observation = [[2.6, 2.6, 2.6, 0.4]]
classifier_sigmoid.predict_proba(new_observation)


In [None]:
------------------------------------#Chapter19: Clustering ------------------------------------------------
#Cluster using K-Means
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
iris = datasets.load_iris()
features = iris.data
scaler = StandardScaler()
features_std = scaler.fit_transform(features)
cluster = KMeans(n_clusters=3, random_state=0, n_jobs=-1)
model = cluster.fit(features_std)
model.cluster_center_

#Cluster using Meanshift
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import MeanShift
iris = datasets.load_iris()
features = iris.data
scaler = standardScaler()
features_std = scaler.fit_transform(features)
cluster = MeanShift(n_jobs=-1)
model = cluster.fit(features_std)

#Cluster using DBSCAN
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
iris = datasets.load_iris()
features = iris.data
scaler = StandardScaler()
features_std = scaler.fit_transform(features)
cluster = DBSCAN(n_jobs=-1)
model = cluster.fit(features_std)

#Cluster using Hierarchical merging
from sklearn.cluster import AgglomerativeClustering
iris = datasets.load_iris()
features = iris.data
scaler = StandardScaler()
features_std = scaler.fit_transform(features)
cluster = AgglomerativeClustering(n_clusters=3)
model = cluster.fit(features_std)


In [None]:
------------------------------------#Chapter20: Neural Networks ------------------------------------------------
#Preprocessing Data for Neural Network
from sklearn import preprocessing
import numpy as np
features = np.array([[-100.1, 3240.1],
                     [-200.2, -234.1],
                     [5000.5, 150.1],
                     [6000.6, -125.1],
                     [9000.9, -673.1]])
scaler = preprocessing.StandardScaler()
features_standardized = scaler.fit_transform(features)

#Designing a Neural Network
from keras import models
from keras import layers
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", input_shape=(10,)))
network.add(layers.Dense(units=16, activation="relu"))
network.add(lyaers.Dense(units=1, activation="sigmoid"))
network.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
"""Binary classification: one unit with a sigmoid activation function and Binary cross-entropy"""
"""Multiclass classification: k(number of target classes) units with a softmax activation function and Categorical cross-entropy"""
"""Regression: one unit  with no activation function and Mean squared error"""

#Training a Binary Classifier
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
np.random.seed(0)
number_of_features = 1000
(data_train, target_train), (data_test, target_test) = imdb.load_data(num_words=number_of_features)
tokenzier = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.squences_to_matrix(data_test, mode="binary")
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", input_shape=(number_of_features,)))
network.add(layers.Dense(units=16, activation="relu"))
network.add(layers.Dense(units=1, activation="sigmoid"))
network.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
history = network.fit(features_train, target_train, epochs=3, verbose=1, batch_size=100, validation_data=(features_test, target_test))
predicted_target = network.predict(features_test)

#Training a Multiclass Classifier
import numpy as np
from keras.datasets import reuters
from keras.utils.np_utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
np.random.seed(0)
number_of_features = 5000
data = reuters.load_data(num_words=number_of_features)
(data_train, target_vector_train), (data_test, target_vector_test) = data
tokenizer = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.sequences_to_matrix(data_test, mode="binary")
target_train = to_categorical(target_vector_train)
target_test = to_categorical(target_vector_test)
network = models.Sequential()
network.add(layers.Dense(units=100, activation="relu", input_shape=(number_of_features,)))
network.add(layers.Dense(units=100, activation="relu"))
network.add(layers.Dense(units=46, activation="softmax"))
network.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
history = network.fit(features_train, target_train, epochs=3, verbose=0, batch_size=100, validation_data=(features_test, target_test))
predicted_target = network.predict(features_test)

#Training a Regressor
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
np.random.seed(0)
features, target = make_regression(n_samples=10000, n_features=3, n_informative=3, n_targets=1, noise=0.0, random_state=0)
features_train, features_target, target_train, target_test = train_test_split(features, target, test_size=0.33, random_state=0)
network = models.Sequential()
network.add(layers.Dense(units=32, activation="relu", input_shape=(features_train.shape[1],)))
network.add(layers.Dense(units=32, activation="relu"))
network.add(layers.Dense(units=1))
network.compile(loss="mse", optimizer="RMSprop", metrics=['mse'])
history = network.fit(features_train, target_train, epochs=10, verbose=0, batch_size=100, validation_data=(features_test, target_test))
predicted_target = network.predict(features_test)

#Visualize Training History
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
import matplotlib.pylot as plt
np.random.seed(0)
number_of_features = 10000
(data_train, target_train), (data_test, target_test) = imdb.load_data(num_words=number_of_features)
tokenzier = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.squences_to_matrix(data_test, mode="binary")
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", input_shape=(number_of_features,)))
network.add(layers.Dense(units=16, activation="relu"))
network.add(layers.Dense(units=1, activation="sigmoid"))
network.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
history = network.fit(features_train, target_train, epochs=15, verbose=0, batch_size=1000, validation_data=(features_test, target_test))

training_loss = history.history["loss"]
test_loss = history.history["val_loss"]
epoch_count = range(1, len(training_loss) + 1)
plt.plot(epoch_count, training_loss, "r--")
plt.plot(epoch_count, test_loss, "b-")
plt.legend(["Training Loss", "Test Loss"])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()

training_accuracy = history.history["acc"]
test_accuracy = history.history["val_acc"]
plt.plot(epoch_count, training_accuracy, "r--")
plt.plot(epoch_count, test_accuracy, "b-")
plt.legend(["Training Accuracy", "Test Accuracy"])
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()

#Reducing Overfitting with Weight Regularization
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
from keras import regularizers
np.random.seed(0)
number_of_features = 1000
(data_train, target_train), (data_test, target_test) = imdb.load_data(num_words=number_of_features)
tokenizer = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.sequences_to_matrix(data_test, mode="binary")
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", kernel_regularizer=regularizer.l2(0.01), input_shape=(number_of_features,)))
network.add(layers.Dense(units=16, activation="relu", kernel_regularizer=relularizer.l2(0.01)))
network.add(layers.Dense(units=1, activation="sigmoid"))
network.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=['accuracy'])
history = network.fit(features_train, target_train, epochs=3, verbose=0, batch_size=100, validation_data=(features_test, target_test))

#Reducing Overfitting with Eearly Stop
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
from keras.callbacks import EarlyStopping, ModelCheckpoint
np.random.seed(0)
number_of_features = 1000
(data_train, target_train), (data_test, target_test) = imdb.load_data(num_words=number_of_features)
tokenizer = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.sequences_to_matrix(data_test, mode="binary")
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", input_shape=(number_of_features,)))
network.add(layers.Dense(units=16, activation="relu")
network.add(layers.Dense(units=1, activation="sigmoid"))
network.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=['accuracy'])
callbacks = [EarlyStopping (monitor="val_loss", pattience=2), ModelCheckpoint(filepath="best_model.h5", monitor="val_loss", save_best_only=True)]
history = network.fit(features_train, target_train, epochs=20, callbacks=callbacks,verbose=0, batch_size=100, validation_data=(features_test, target_test))

#Reducing Overfitting with Dropout
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
np.random.seed(0)
number_of_features = 1000
(data_train, target_train), (data_test, target_test) = imdb.load_data(num_words=number_of_features)
tokenizer = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.sequences_to_matrix(data_test, mode="binary")
network = models.Sequential()
network.add(layers.Dropout(0.2, input_shape=(number_of_features,)))
network.add(layers.Dense(units=16, activation="relu")
network.add(layers.Dropout(0.5))
network.add(layers.Dense(units=16, activation="relu"))
network.add(layers.Dropout(0.5))
network.add(layers.Dense(units=1, activation="sigmoid"))
network.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=['accuracy'])
history = network.fit(features_train, target_train, epochs=3, callbacks=callbacks,verbose=0, batch_size=100, validation_data=(features_test, target_test))

#K-Fold Validating Neural Networks
import numpy as np
from keras import models
from keras import layers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets imports make_classification
np.random.seed(0)
number_of_features=100
features, target = make_classification(n_samples=10000, n_features=number_of_features, n_informative=3, n_redundant=0, n_classes=2, weights=[.5,.5], random_state=0)
def create_network():
    network = models.Sequential()
    network.add(layers.Dense(units=16, activation="relu", input_shape=(number_of_features,)))
    network.add(layers.Dense(units=16, activation="relu"))
    network.add(layers.Dense(units=1, activation="sigmoid"))
    network.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
    return network
neural_network = KerasClassifier(build_fn=create_network, epochs=10, batch_size=100, verbose=0)
cross_val_score(neural_network, features, target, cv=3)

#Tuning Neural Network
import numpy as np
from keras import models
from keras import layers
from keras.wrappers.scikit_learn import KerasClassfier
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_classification
np.random.seed(0)
number_of_features = 100
features, target = make_classification(n_samples=10000, n_features=number_of_features, n_informative=3, n_redundant=0, n_classes=2, weights=[.5, .5], random_state=0)
def create_network(optimizer="rmsprop"):
    network = models.Sequential()
    network.add(layers.Dense(units=16, activation="relu", input_shape=(number_of_features,)))
    network.add(layers.Dense(units=16, activation="relu"))
    network.add(layers.Dense(units=1, activation="sigmoid"))
    network.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return network
neural_network = KerasClassifier(build_fn=create_network, verbose=0)
epochs = [5,10]
batches = [5,10,100]
optimizers = ["rmsprop","adam"]
hyperparameters = dict(optimizers=optimizers, epochs=epochs, batch_size=batches)
grid = GridSearchCV(estimator=neural_network, param_grid=hyperparameters)
grid_result = grid.fit(features, target)
grid_result.best_param_
            
#Visualizing Neural Network
from keras import models
from keras import layers
from Ipython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", input_shape=(10,)))
network.add(layers.Dense(units=16, activation="relu"))
network.add(layers.Dense(units=1, activation="sigmoid"))
SVG(model_to_dot(network, show_shapes=True).create(prog="dot", format="svg"))
plot_model(network, show_shapes=True, to_file="network.png")

In [1]:
#Classifying Images
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_data_format("channels_first")
np.random.seed(0)
channels = 1
height = 28
width = 28
(data_train, data_test), (target_train, target_test) = mnist.load_data()
data_train = data_train.reshape(data_train.shape[0], channels, height, width)
data_test = data_test.reshape(data_test.shape[0], channels, height, width)
features_train = data_train / 255
features_test = data_test / 255

target_train = np.utils.to_categorical(target_train)
target_test = np.utils.to_categorical(targat_test)
number_of_classes = target_test.shape[1]
network = Sequential()
network.add(Conv2D(filters=64, kernel_size=(5,5), input_shape=(channels, width, height), activation="relu"))
network.add(MaxPooling2D(pool_size=(2,2)))
network.add(Dropout(0.5))
network.add(Flatten())
network.add(Dense(128, activation="relu"))
network.add(Dropout(0.5))
network.add(Dense(number_of_classes, activation="softmax"))
network.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=['accuracy'])
network.fit(features_train, target_train, epochs=2, verbose=0, batch_size=1000, validation_data=(feature_test, target_test))
"""Convolutional neural network are popular type of network effect at computer vision """


#Improving Performance with Image Augmentation
from keras.preprocessing.image import ImageDataGenerator
augmentation = ImageDataGenerator(featurewise_center=True, zoom_range=0.3, width_shift_range=0.2, horizontal_flip=True, rotation_range=90)
augment_images = augmentation.flow_from_diretory("raw/images", batch_size=32, class_mode="binary", save_to_dir="processed/images")
network.fit_generator(augment_images, steps_per_epoch=2000, epochs=5, validation_data=augment_images_test, validation_steps=800)

ModuleNotFoundError: No module named 'keras'

In [None]:
#Classifying Text
import numpy as np
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras import models
from keras import layers
np.random.seed(0)
number_of_features = 1000
(data_train, target_train), (data_test, target_test) = imdb.load_data(num_words=number_of_features)
features_train = sequence.pad_sequences(data_train, maxlen=400)
features_test = sequence.pad_sequences(data_test, maxlen=400)
network = models.Sequential()
network.add(layers.Embedding(input_dim=number_of_features, output_dim=128))
network.add(layers.LSTM(units=128))
network.add(layers.Dense(units=1, activation="sigmoid"))
network.compile(loss="binary_crossentropy", optimizer="Adam", metrics=['accuracy'])
history = network.fit(features_train, target_train, epochs=3, verbose=0, batch_size=1000, validation_data=(features_test, target_test))

In [None]:
------------------------------ #Chapter21: Saving and Loding Trained Model ---------------------------------------------
#Saving & Loading scikit-learn model
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
from sklearn.externals import joblib
iris = datasets.load_iris()
features = iris.data
target = iris.target
classifier = RandomForestClassifier()
model = classifier.fit(features, target)
joblib.dump(model, "model.pkl")
classifier = joblib.load("model.pkl")

#Saving & Loading a Keras Model
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
from keras.models import load_model
np.random.seed(0)
number_of_features = 1000
(train_data, train_target), (test_data, test_target) = imdb.load_data(num_words = number_of_features)
tokenizer = Tokenizer(num_words=number_of_features)
train_features = tokenizer.sequences_to_matrix(train_data, mode="binary")
test_features = tokenizer.sequences_to_matrix(test_data, mode="binary")
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu",input_shape=(number_of_features,)))
network.add(layers.Dense(units=1, activation="sigmoid"))
network.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
history = network.fit(train_features, train_target, epochs=3, verbose=0, batch_size=100, validation_data=(test_features, test_target))
network.save("model.h5")
network = load_model("model.h5")