# Module 

In [99]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split

import numpy as np

# Config 

In [100]:
max_depth = 2
random_state = 0

In [101]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

In [4]:
X.shape

(150, 4)

In [7]:
rfc = RandomForestClassifier(
    max_depth=max_depth,
    random_state=random_state,
#     criterion="gini"
)

In [8]:
rfc.fit(X_train, y_train)

RandomForestClassifier(max_depth=2, random_state=0)

In [9]:
rfc.score(X_test, y_test)

0.9066666666666666

In [10]:
rfr = RandomForestRegressor(
    max_depth=max_depth,
    random_state=random_state,
#     criterion="gini"
)

In [11]:
rfr.fit(X_train, y_train)

RandomForestRegressor(max_depth=2, random_state=0)

In [42]:
rfr.score(X_test, y_test)

0.8996773794824336

# Tensorflow Catch-up 

## Packages 

In [155]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import sys
import logging
base_dir = '../'
sys.path.append(base_dir)
# from src.Model.Recommender.DNN import DNN
from typing import List

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Config 

In [194]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
from src.BaseClass.DLModel import DLModel


TensorFlow version: 2.0.0


In [267]:


# Model config
hidden_dim_lst = [96, 24, 8]
position_dnn_dims = [24, 3]
# feature_dim = 30

## Data  

In [111]:
X, y = load_breast_cancer(return_X_y=True, as_frame=True)

In [112]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [113]:
# X_train, X_eval, y_train, y_eval = train_test_split(X_train, y_train, test_size=0.15)

In [114]:
print(f"{X_train.shape} {X_test.shape}")

(455, 30) (114, 30)


In [115]:
y_train.isna().sum()

0

In [116]:
X_train.isna().sum()

mean radius                0
mean texture               0
mean perimeter             0
mean area                  0
mean smoothness            0
mean compactness           0
mean concavity             0
mean concave points        0
mean symmetry              0
mean fractal dimension     0
radius error               0
texture error              0
perimeter error            0
area error                 0
smoothness error           0
compactness error          0
concavity error            0
concave points error       0
symmetry error             0
fractal dimension error    0
worst radius               0
worst texture              0
worst perimeter            0
worst area                 0
worst smoothness           0
worst compactness          0
worst concavity            0
worst concave points       0
worst symmetry             0
worst fractal dimension    0
dtype: int64

In [117]:
X_train.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
522,11.26,19.83,71.3,388.1,0.08511,0.04413,0.005067,0.005664,0.1637,0.06343,...,11.93,26.43,76.38,435.9,0.1108,0.07723,0.02533,0.02832,0.2557,0.07613
200,12.23,19.56,78.54,461.0,0.09586,0.08087,0.04187,0.04107,0.1979,0.06013,...,14.44,28.36,92.15,638.4,0.1429,0.2042,0.1377,0.108,0.2668,0.08174
24,16.65,21.38,110.0,904.6,0.1121,0.1457,0.1525,0.0917,0.1995,0.0633,...,26.46,31.56,177.0,2215.0,0.1805,0.3578,0.4695,0.2095,0.3613,0.09564
95,20.26,23.03,132.4,1264.0,0.09078,0.1313,0.1465,0.08683,0.2095,0.05649,...,24.22,31.59,156.1,1750.0,0.119,0.3539,0.4098,0.1573,0.3689,0.08368
397,12.8,17.46,83.05,508.3,0.08044,0.08895,0.0739,0.04083,0.1574,0.0575,...,13.74,21.06,90.72,591.0,0.09534,0.1812,0.1901,0.08296,0.1988,0.07053


In [118]:
# help(train_test_split)

## Model 

In [148]:
model = DNN(hidden_dim=hidden_dim_lst)

In [149]:
model

<src.Model.Recommender.DNN.DNN at 0x7fa911384e80>

In [203]:
class DNN(Model):
    def __init__(self, hidden_dim: List[int], sigmoid=True) -> None:
        super(DNN, self).__init__()
        for dim in hidden_dim:
            self.layers_lst.append(
                Dense(units=dim, use_bias=True)
            )
        self.sigmoid = sigmoid
        if self.sigmoid:
            self.layers_lst.append(
                Activation(activation="sigmoid")
            )
            
    def call(self, inputs, training=False, mask=None):
        print(inputs.shape)
        for layer in self.layers_lst:
            inputs = layer(inputs)
        return inputs

In [170]:
model.compile(
    optimizer='adam'
    , loss=tf.keras.losses.BinaryCrossentropy(
#         from_logits=True
    )
    , metrics=[
        tf.keras.metrics.AUC(), 
#                tf.keras.losses.BinaryCrossentropy()
    ]
)
model.fit(
    x=np.asarray(X_train)
    , y=np.asarray(y_train)
#     , batch=3
    , epochs=6
    , validation_split=0.1
)

Train on 409 samples, validate on 46 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7fa9110cec88>

In [172]:
model.evaluate(x=np.asarray(X_test), y=np.asarray(y_test), verbose=False)

[0.5717250291715589, 0.9410291]

# TF example

In [176]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [177]:
num_tags = 12  # Number of unique issue tags
num_words = 10000  # Size of vocabulary obtained when preprocessing text data
num_departments = 4  # Number of departments for predictions

title_input = keras.Input(
    shape=(None,), name="title"
)  # Variable-length sequence of ints
body_input = keras.Input(shape=(None,), name="body")  # Variable-length sequence of ints
tags_input = keras.Input(
    shape=(num_tags,), name="tags"
)  # Binary vectors of size `num_tags`

# Embed each word in the title into a 64-dimensional vector
title_features = layers.Embedding(num_words, 64)(title_input)
# Embed each word in the text into a 64-dimensional vector
body_features = layers.Embedding(num_words, 64)(body_input)

# Reduce sequence of embedded words in the title into a single 128-dimensional vector
title_features = layers.LSTM(128)(title_features)
# Reduce sequence of embedded words in the body into a single 32-dimensional vector
body_features = layers.LSTM(32)(body_features)

# Merge all available features into a single large vector via concatenation
x = layers.concatenate([title_features, body_features, tags_input])

# Stick a logistic regression for priority prediction on top of the features
priority_pred = layers.Dense(1, name="priority")(x)
# Stick a department classifier on top of the features
department_pred = layers.Dense(num_departments, name="department")(x)

# Instantiate an end-to-end model predicting both priority and department
model = keras.Model(
    inputs=[title_input, body_input, tags_input],
    outputs=[priority_pred, department_pred],
)

In [179]:
# keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True)

In [180]:
model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss={
        "priority": keras.losses.BinaryCrossentropy(from_logits=True),
        "department": keras.losses.CategoricalCrossentropy(from_logits=True),
    },
    loss_weights={"priority": 1.0, "department": 0.2},
)

In [181]:
# Dummy input data
title_data = np.random.randint(num_words, size=(1280, 10))
body_data = np.random.randint(num_words, size=(1280, 100))
tags_data = np.random.randint(2, size=(1280, num_tags)).astype("float32")

# Dummy target data
priority_targets = np.random.random(size=(1280, 1))
dept_targets = np.random.randint(2, size=(1280, num_departments))

model.fit(
    {"title": title_data, "body": body_data, "tags": tags_data},
    {"priority": priority_targets, "department": dept_targets},
    epochs=2,
    batch_size=32,
)

Train on 1280 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7fa9166bb7f0>

In [184]:
model.evaluate(
    {"title": title_data, "body": body_data, "tags": tags_data},
    {"priority": priority_targets, "department": dept_targets},
verbose=False)

[1.2954461485147477, 0.6945294, 3.0045838]

In [191]:
priority, department = model.predict({"title": title_data, "body": body_data, "tags": tags_data},)

In [192]:
priority.shape

(1280, 1)

In [193]:
department.shape

(1280, 4)

# My own codes 

In [268]:
# class DNN(DLModel):
#     def __init__(self, hidden_dim: List[int], sigmoid=True) -> None:
#         super(DNN, self).__init__()
#         for dim in hidden_dim:
#             self.layers_lst.append(
#                 Dense(units=dim, use_bias=True)
#             )
#         self.sigmoid = sigmoid
#         if self.sigmoid:
#             self.layers_lst.append(
#                 Activation(activation="sigmoid")
#             )
            
#     def call(self, inputs, training=False, mask=None):
#         print(inputs.shape)
#         for layer in self.layers_lst:
#             inputs = layer(inputs)
#         return inputs


from src.BaseClass.DLModel import DLModel
from src.Model.Recommender.DNN import DNN

from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dense
from typing import List
import tensorflow as tf
    
class DNN_position(DLModel):
    def __init__(self, dnn_dims: List[int], position_dnn_dims: List[int], output_dim=1) -> None:
        super(DNN_position, self).__init__()
        self.dnn = DNN(hidden_dim=dnn_dims, sigmoid=False)
        self.position_dnn = DNN(hidden_dim=position_dnn_dims, sigmoid=False)
        self.concat = tf.keras.layers.Concatenate()
        self.dense = Dense(units=1, activation='sigmoid')

    def call(self, inputs, training=False, mask=None) -> tf.Tensor:
        dnn_layer = self.dnn(inputs)
        dnn_position_layer = self.position_dnn(inputs)
        final_layer = self.concat([dnn_layer, dnn_position_layer])
        output = self.dense(final_layer)
        return output

In [269]:
hidden_dim_lst

[96, 24, 8]

In [270]:
model = DNN_position(dnn_dims=hidden_dim_lst, position_dnn_dims=position_dnn_dims)

In [271]:
model.compile(
    optimizer='adam'
    , loss=tf.keras.losses.BinaryCrossentropy(
#         from_logits=True
    )
    , metrics=[
        tf.keras.metrics.AUC(), 
#                tf.keras.losses.BinaryCrossentropy()
    ]
)

In [272]:
model.fit(
    x=np.asarray(X_train)
    , y=np.asarray(y_train)
#     , batch=3
    , epochs=6
#     , validation_split=0.1
)

    def call(self, inputs, training=False, mask=None) -> tf.Tensor:
        dnn_layer = self.dnn(inputs)
        dnn_position_layer = self.position_dnn(inputs)
#         if training:
        final_layer = self.concat([dnn_layer, dnn_position_layer])
        output = self.dense(final_layer)
        return output

This may be caused by multiline strings or comments not indented at the same level as the code.
    def call(self, inputs, training=False, mask=None) -> tf.Tensor:
        dnn_layer = self.dnn(inputs)
        dnn_position_layer = self.position_dnn(inputs)
#         if training:
        final_layer = self.concat([dnn_layer, dnn_position_layer])
        output = self.dense(final_layer)
        return output

This may be caused by multiline strings or comments not indented at the same level as the code.
Train on 455 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7fa8eeb23d68>

In [273]:
model.summary()

Model: "dnn_position_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dnn_29 (DNN)                 multiple                  5504      
_________________________________________________________________
dnn_30 (DNN)                 multiple                  819       
_________________________________________________________________
concatenate_5 (Concatenate)  multiple                  0         
_________________________________________________________________
dense_84 (Dense)             multiple                  12        
Total params: 6,335
Trainable params: 6,335
Non-trainable params: 0
_________________________________________________________________


In [276]:
keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True)

Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.


In [278]:
model.evaluate(x=np.asarray(X_test), y=np.asarray(y_test), verbose=False)

[0.9218728791155419, 0.9074508]