In [None]:
# Instalación desde cero de librerías de Python

In [None]:
%%bash
pip install --upgrade tensorflow
pip install --upgrade sklearn

# Si no funciona sklearn, hacer uninstall e install: pip uninstall scikit-learn
#pip uninstall scikit-learn
#pip install scikit-learn

# Comprobar paquetes instalados con:
# !pip freeze



In [1]:
# Ejemplo de importar csv desde GCS


import pandas as pd
from StringIO import StringIO
from sklearn.model_selection import train_test_split
from __future__ import print_function, division

# Read csv file from GCS into a variable
%storage read --object gs://analiticauniversal/DatasetsTF/creditcards.csv --variable creditcards


# Store in a pandas dataframe

df = pd.read_csv(StringIO(creditcards))
dataset = df.as_matrix().astype(float)

TRAIN_SIZE = 0.8
VAL_SIZE = 0.1
TEST_SIZE = 0.1

X_train, X_test, y_train, y_test = train_test_split(dataset[:,:-1], dataset[:,-1], test_size=TEST_SIZE)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=VAL_SIZE/TRAIN_SIZE)

In [18]:
X_train.shape

(224284, 30)

In [19]:
X_val.shape

(32041, 30)

In [21]:
X_test.shape

(28481, 30)

In [2]:
import numpy as np
import tensorflow
import tensorflow as tf
import sys


In [3]:
from tensorflow.contrib.keras.python.keras.regularizers import l1,l2
from tensorflow.contrib.keras.python.keras.models import Sequential, load_model
from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, Activation
from tensorflow.contrib.keras.python.keras.constraints import max_norm
from tensorflow.contrib.keras.python.keras.optimizers import RMSprop, Adam
from tensorflow.contrib.keras.python.keras.layers.normalization import BatchNormalization
from tensorflow.contrib.keras.python.keras.callbacks import CSVLogger, TensorBoard, EarlyStopping
from sklearn.metrics import roc_auc_score
from datetime import datetime
from os.path import abspath
import os


# Disable info warnings from TF
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'


In [4]:
# Creamos jerarquía en Storage y en Local (Compute Engine)
# Usar os.path.join mejor que '/'

NOW = datetime.now().strftime("%Y-%m-%d--%Hh%Mm%Ss")
ROOT_LOGDIR = 'gs://analiticauniversal/LogsTF'
LOG_DIR = '{}/run-{}'.format(ROOT_LOGDIR, NOW)
OUTPUT_FILE = LOG_DIR + '/results.txt'

# ¿Por qué no lo veo en consola ssh?
ROOT_DATALAB = '/mnt/disks/datalab-pd/content/datalab/docs/analiticauniversal/NotebooksTF/Logs'
LOCAL_DIR = '{}/run-{}'.format(ROOT_DATALAB,NOW)
CSV_LOG = LOCAL_DIR + '/training.log'

if tf.gfile.Exists(LOG_DIR):
    tf.gfile.DeleteRecursively(LOG_DIR)
tf.gfile.MakeDirs(LOG_DIR)

if tf.gfile.Exists(LOCAL_DIR):
    tf.gfile.DeleteRecursively(LOCAL_DIR)
tf.gfile.MakeDirs(LOCAL_DIR)


In [5]:
# Hyperparameters
batch_size = 500
epochs = 50
dropout_rate = 0.5

# Parameters for early stopping (increase them when using auc scores)
DELTA = 1e-6
PATIENCE = 20

In [6]:
csv_logger = CSVLogger(CSV_LOG)
early_stopping = EarlyStopping(min_delta = DELTA, patience = PATIENCE )
tb = TensorBoard(log_dir = LOCAL_DIR,histogram_freq = 1, 
                 write_graph = True, write_images = False)

In [9]:
# 30 variables entrada
input_dim = dataset.shape[1] - 1
num_classes = 2

model = Sequential()

model.add(Dense(10,input_shape=(input_dim,), kernel_initializer='he_normal'))
#model.add(BatchNormalization())
model.add(Activation('elu'))
#model.add(Dropout(dropout_rate))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 3)                 93        
_________________________________________________________________
batch_normalization_2 (Batch (None, 3)                 12        
_________________________________________________________________
activation_2 (Activation)    (None, 3)                 0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 3)                 0         
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 8         
Total params: 113.0
Trainable params: 107.0
Non-trainable params: 6.0
_________________________________________________________________


In [8]:
# Entrenamos el modelo

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])


history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(X_val, y_val),
                    callbacks=[csv_logger, early_stopping, tb])


score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1], "\n")



y_pred = model.predict_proba(X_test, verbose = 0)
y_score = y_pred[:,1]
auc = roc_auc_score(y_true=y_test, y_score=y_score)
auc *=100
print("Test AUC:", auc)





Train on 224284 samples, validate on 32041 samples
INFO:tensorflow:Summary name dense_1/kernel:0 is illegal; using dense_1/kernel_0 instead.
INFO:tensorflow:Summary name dense_1/bias:0 is illegal; using dense_1/bias_0 instead.
INFO:tensorflow:Summary name batch_normalization_1/gamma:0 is illegal; using batch_normalization_1/gamma_0 instead.
INFO:tensorflow:Summary name batch_normalization_1/beta:0 is illegal; using batch_normalization_1/beta_0 instead.
INFO:tensorflow:Summary name batch_normalization_1/moving_mean:0 is illegal; using batch_normalization_1/moving_mean_0 instead.
INFO:tensorflow:Summary name batch_normalization_1/moving_variance:0 is illegal; using batch_normalization_1/moving_variance_0 instead.
INFO:tensorflow:Summary name dense_2/kernel:0 is illegal; using dense_2/kernel_0 instead.
INFO:tensorflow:Summary name dense_2/bias:0 is illegal; using dense_2/bias_0 instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9

In [10]:
sys.stdout = tf.gfile.Open(name=OUTPUT_FILE, mode='w')  
json_string = model.to_json() 
print("Network structure (json format)", "\n")
print(json_string, "\n")
print("Hyperparameters", "\n")
print("Batch size:", batch_size)
print("Epochs:", epochs)
print("Dropout rate:", dropout_rate, "\n")
model.summary()
print('\n','Test loss:', score[0])
print('Test accuracy:', score[1]*100, '\n')
print('Test AUC:', auc)
sys.stdout = sys.__stdout__

In [11]:
# Copia los outputs de Keras a GCS
# Usar os.path.join mejor que '/'
for filename in tf.gfile.ListDirectory(LOCAL_DIR):
    tf.gfile.Copy(oldpath=LOCAL_DIR + '/' + filename , newpath=LOG_DIR + '/' +  filename )

In [16]:
for filename in tf.gfile.ListDirectory(ROOT_DATALAB):
    tf.gfile.DeleteRecursively(ROOT_DATALAB + '/' + filename)

In [None]:
tf.gfile.ListDirectory(ROOT_DATALAB)