In [None]:
# Instalación desde cero de librerías de Python

In [None]:
%%bash
pip install --upgrade tensorflow
pip install --upgrade sklearn

# Si no funciona sklearn, hacer uninstall e install: pip uninstall scikit-learn
#pip uninstall scikit-learn
#pip install scikit-learn

# Comprobar paquetes instalados con:
# pip freeze



In [1]:
# Ejemplo de importar csv desde GCS


import pandas as pd
from StringIO import StringIO
from sklearn.model_selection import train_test_split
from __future__ import print_function

# Read csv file from GCS into a variable
%storage read --object gs://analiticauniversal/DatasetsTF/creditcards.csv --variable creditcards

# Store in a pandas dataframe
df = pd.read_csv(StringIO(creditcards))
dataset = df.as_matrix()


X_train, X_test, y_train, y_test = train_test_split(dataset[:,:-1], dataset[:,-1], test_size=0.1, random_state=1)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=1)

In [3]:
import numpy as np
import tensorflow
import tensorflow as tf
import sys


In [4]:
from tensorflow.contrib.keras.python.keras.regularizers import l1,l2
from tensorflow.contrib.keras.python.keras.models import Sequential, load_model
from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, Activation
from tensorflow.contrib.keras.python.keras.constraints import max_norm
from tensorflow.contrib.keras.python.keras.optimizers import RMSprop, Adam
from tensorflow.contrib.keras.python.keras.layers.normalization import BatchNormalization
from tensorflow.contrib.keras.python.keras.callbacks import CSVLogger, TensorBoard, ModelCheckpoint, EarlyStopping
from sklearn.metrics import roc_auc_score
from datetime import datetime
from os.path import abspath
import os


# Disable info warnings from TF
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

NOW = datetime.now().strftime("%Y-%m-%d--%Hh%Mm%Ss")
ROOT_LOGDIR = 'gs://analiticauniversal/LogsTF'
LOG_DIR = '{}/run-{}'.format(ROOT_LOGDIR, NOW)
OUTPUT_FILE = LOG_DIR + '/results.txt'

LOCAL_DIR = '/run-{}'.format(NOW)
CSV_LOG = LOCAL_DIR + '/training.log'
CKPT = LOCAL_DIR + '/ckpt.hdf5'
MODEL = LOCAL_DIR + '/model.h5'

if tf.gfile.Exists(LOG_DIR):
    tf.gfile.DeleteRecursively(LOG_DIR)
tf.gfile.MakeDirs(LOG_DIR)

if tf.gfile.Exists(LOCAL_DIR):
    tf.gfile.DeleteRecursively(LOCAL_DIR)
tf.gfile.MakeDirs(LOCAL_DIR)



In [5]:
# Hyperparameters
batch_size = 500
epochs = 10
dropout_rate = 0.5

# Parameters for early stopping (increase them when using auc scores)
DELTA = 1e-6
PATIENCE = 200

# Auc callback interval
AUCS_INTERVAL = 1

In [6]:
csv_logger = CSVLogger(CSV_LOG)
early_stopping = EarlyStopping(min_delta = DELTA, patience = PATIENCE )
#ckpt = ModelCheckpoint(filepath = CKPT, save_best_only = True)

In [8]:

x_train = X_train
y_train = y_train
x_val = X_val
y_val = y_val
x_test = X_test
y_test = y_test

input_dim = dataset.shape[1] - 1
num_classes = 2

model = Sequential()
#model.add(Dense(5,input_shape=(input_dim,), init='he_normal'))
model.add(Dense(5,input_shape=(input_dim,)))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(Dropout(dropout_rate))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])


history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_val, y_val),
                    callbacks=[csv_logger, early_stopping])


score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1], "\n")



y_pred = model.predict_proba(x_test, verbose = 0)
y_score = y_pred[:,1]
auc = roc_auc_score(y_true=y_test, y_score=y_score)
auc *=100
print("Test AUC:", auc)





_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 5)                 155       
_________________________________________________________________
batch_normalization_1 (Batch (None, 5)                 20        
_________________________________________________________________
activation_1 (Activation)    (None, 5)                 0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5)                 0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 12        
Total params: 187.0
Trainable params: 177.0
Non-trainable params: 10.0
_________________________________________________________________
Train on 230692 samples, validate on 25633 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/1

In [9]:
sys.stdout = tf.gfile.Open(name=OUTPUT_FILE, mode='w')  
json_string = model.to_json() 
print("Network structure (json format)", "\n")
print(json_string, "\n")
print("Hyperparameters", "\n")
print("Batch size:", batch_size)
print("Epochs:", epochs)
print("Dropout rate:", dropout_rate, "\n")
model.summary()
"""
print("Validation AUCs during training", "\n")
for i in range(len(ival.aucs)):
    print("\t","Epoch", str(i), "- val_auc:", ival.aucs[i], " - loss:", ival.losses[i])
"""
print('\n','Test loss:', score[0])
print('Test accuracy:', score[1]*100, '\n')
print('Test AUC:', auc)
sys.stdout = sys.__stdout__

In [10]:
tf.gfile.Copy(oldpath=CSV_LOG , newpath=LOG_DIR + '/training.log' )
#tf.gfile.Copy(oldpath=CKPT , newpath=LOG_DIR + '/ckpt.hdf5' )



In [11]:
!rm -rf LOCAL_DIR