In [1]:
#  breakhistf2v026-nsl-v4-02.ipynb  v1.0.0 
# 
#  --------------------------------------------------
#  Hangzhou Domain Zones Technology Co., Ltd

#  Apache Licence 2.0       https://www.apache.org/licenses/LICENSE-2.0
#  --------------------------------------------------


import os
import tensorflow as tf
import numpy as np
import csv
import matplotlib.pyplot as plt
from tensorflow import keras
import neural_structured_learning as nsl
from tensorflow.python.keras.api._v2.keras import layers, optimizers, losses
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import TensorBoard
import random


tf.random.set_seed(22)
np.random.seed(22)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')

IMAGE_INPUT_NAME = 'images'
LABEL_INPUT_NAME = 'labels'
  
%matplotlib inline
 
def read_csv(csvnamepath, filename, is2=False):
     

    # read from csv file
    images, labels = [], []
    benigns = ["adenosis" ,"fibroadenoma" ,"tubular_adenoma" , "phyllodes_tumor"]
    with open(os.path.join(csvnamepath, filename)) as f:
        reader = csv.reader(f)
        for row in reader:
            img, label, cancername = row
            if is2:
                if  cancername in benigns:
                    label = 0
                else:
                    label = 1
            else:
                label = int(label)

            images.append(img)
            labels.append(label)

    assert len(images) == len(labels)

    return images, labels

@tf.function 
def load_breakhis(dirc, filename, is2 = False , mode='train' ):
    classdir2label={}
     
    filedirs = os.listdir( dirc)
    for filedir in filedirs:
        if not os.path.isdir(os.path.join(dirc,filedir)):
            continue
        classdir2label[filedir]=len(classdir2label.keys())
    

     
    images, labels = read_csv(os.path.join(os.path.abspath('.'),'tf2breakhis'), filename ,is2)  
    if mode == 'train':  # 60%
        images = images[:int(0.6 * len(images))]
        labels = labels[:int(0.6 * len(labels))]
    elif mode == 'val':  # 20% = 60%->80%
        images = images[int(0.6 * len(images)):int(0.8 * len(images))]
        labels = labels[int(0.6 * len(labels)):int(0.8 * len(labels))]
    else:  # 20% = 80%->100%
        images = images[int(0.8 * len(images)):]
        labels = labels[int(0.8 * len(labels)):]
    return images, labels, classdir2label
 
img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])

@tf.function
def normalize(x, mean=img_mean, std=img_std):
    # x shape: [224, 224, 3]
    # mean：shape为1；这里用到了广播机制。我们安装好右边对齐的原则，可以得到如下；
    # mean : [1, 1, 3], std: [3]        先插入1
    # mean : [224, 224, 3], std: [3]    再变为224
    x = (x - mean)/std
    return x

# 数据normalize之后，这里有一个反normalizaion操作。比如数据可视化的时候，需要反过来。
# @tf.function
def denormalize(x, mean=img_mean, std=img_std):
    x = x * std + mean
    return x

  
# @tf.function 
def preprocess_train_nsl(images,labels):
    i =0;
    images1= []
    labels1= []
    imagelen = len(images)
    #imagelen = 5
    print (tf.__version__)
    print (len(images))
    while (True):
        if i >= imagelen :
            break
        
        x= images[i]
        y =labels[i]
        x = tf.io.read_file(x)
        x = tf.image.decode_jpeg(x, channels=3) # RGBA    
        x = tf.image.resize(x, [244, 244])
        
        x1 = tf.image.random_flip_left_right(x)
     
        # x = tf.image.random_flip_up_down(x)
         
        x = tf.image.random_crop(x, [224,224,3])
        
        x1 = tf.image.random_crop(x1, [224,224,3])
        
       # x = tf.image.resize_with_crop_or_pad(x, 460,700)
        #x = tf.image.random_flip_left_right(x)
       #   x = tf.image.random_crop(x, [460,700,3])
        x = tf.cast(x, dtype=tf.float32) / 255.
        x = normalize(x)
        x = tf.convert_to_tensor(x) 
        
        x1 = tf.cast(x1, dtype=tf.float32) / 255.
        x1 = normalize(x1)
        x1 = tf.convert_to_tensor(x1) 
        
        y = tf.convert_to_tensor(y)
        #y = tf.one_hot(y, depth=8)      
        images1.append(x)
        labels1.append(y)
        
        images1.append(x1)
        labels1.append(y)
        
        i = i+1 
    return images1, labels1


def preprocessnsl(images,labels):
    i =0;
    images1= []
    labels1= []
    imagelen = len(images)
    #imagelen = 5
    print (tf.__version__)
    print (len(images))
    while (True):
        if i >= imagelen :
            break
        
        x= images[i]
        y =labels[i]
        x = tf.io.read_file(x)
        x = tf.image.decode_jpeg(x, channels=3) # RGBA    
        x = tf.image.resize(x, [244, 244])
   
        x = tf.image.random_crop(x, [224,224,3])
       # x = tf.image.resize_with_crop_or_pad(x, 460,700)
        #x = tf.image.random_flip_left_right(x)
       #   x = tf.image.random_crop(x, [460,700,3])
        x = tf.cast(x, dtype=tf.float32) / 255.
        x = normalize(x)
        x = tf.convert_to_tensor(x) 
        y = tf.convert_to_tensor(y)
        #y = tf.one_hot(y, depth=8)      
        images1.append(x)
        labels1.append(y)
        i = i+1 
    return images1, labels1




def convert_to_tensor(images,labels):
    timages1 = tf.convert_to_tensor(images)
    #timages1.reshape(imagelen,2224,224,3) 
    tlabels1 = tf.convert_to_tensor(labels) 
    #tlabels1 = tf.one_hot(tlabels1, depth=8)
    return timages1, tlabels1

 
def shuffletwolists(list1,list2,shufflesize):
    temp = list(zip(list1, list2))
    i =0
    while(True):
        if i > shufflesize:
            break  
        random.shuffle(temp)
        i = i+1
    
    res1 ,res2 =zip(*temp)
    return list(res1), list(res2)



@tf.function 
def convert_to_dictionaries(image, label):
    return {'images':image, 'labels':label}

 
  
  

batch_size = 32
 
    
images, labels, table = load_breakhis(os.path.join(os.path.abspath('.'),'tf2breakhis'),  "tf2breakhisnewCSV",False,'train')


(images , labels)  =shuffletwolists( images ,labels ,1000)  

(x_train , y_train)  = preprocess_train_nsl(images ,labels ) 

 
(x_train , y_train)  =shuffletwolists( x_train ,y_train ,1000)

  
images2, labels2, table = load_breakhis(os.path.join(os.path.abspath('.'),'tf2breakhis'),  "tf2breakhisnewCSV",False,'val')
val_steps =  len(images2) / batch_size

 

(x_val , y_val)  = preprocessnsl(images2 ,labels2 ) 

(x_val , y_val)  =shuffletwolists( x_val ,y_val ,1000 ) 
 

db_train = tf.data.Dataset.from_tensor_slices({'feature':x_train, 'label':y_train}).batch(batch_size)
 
db_val =  tf.data.Dataset.from_tensor_slices({'feature':x_val, 'label':y_val}).batch(batch_size)

  

resnetdense = keras.Sequential([

    tf.keras.Input((224, 224,3), name='feature'),
    #tf.keras.Input((460, 700,3), name='feature'),
    
    #tf.keras.Input((456, 692,3), name='feature'),
 
    
    tf.keras.layers.Conv2D(16,5,3 , activation=tf.nn.relu),
    tf.keras.layers.MaxPool2D(3,3),
    #tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(64,5,3, activation=tf.nn.relu),
    tf.keras.layers.MaxPool2D(2,2),
    #tf.keras.layers.ReLU(),
    
    tf.keras.layers.Flatten(),
   
    #tf.keras.layers.Dense(128), 
    
    tf.keras.layers.Dense(128,activation=tf.nn.sigmoid ), 
    tf.keras.layers.Dropout(rate=0.5),
    tf.keras.layers.Dense(64, activation=tf.nn.relu),  
    #tf.keras.layers.ReLU(),
    #tf.keras.layers.Dense(8)
    tf.keras.layers.Dense(8, activation=tf.nn.softmax)
   ])


 
  

LOGDIR='log/breakhistf2v026-nsl-v4-02' 
  
adv_grad_norm = 'infinity'

adv_config = nsl.configs.make_adv_reg_config(multiplier=0.2, adv_step_size=0.05,adv_grad_norm = adv_grad_norm)
adv_model = nsl.keras.AdversarialRegularization(resnetdense, adv_config=adv_config)
 
adv_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
  

adv_model.fit(db_train,  validation_data=  db_val , validation_steps = val_steps, epochs=100, verbose =1)

images3, labels3, table = load_breakhis(os.path.join(os.path.abspath('.'),'tf2breakhis'),  "tf2breakhisnewCSV",False,'test')


(x_test , y_test)  = preprocessnsl(images3 ,labels3 ) 
db_test =  tf.data.Dataset.from_tensor_slices({'feature':x_test, 'label':y_test}).batch(batch_size)
 
adv_model.evaluate( db_test)
 
  

 
 
 
 

2.0.0
4745
2.0.0
1582
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100


Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100


Epoch 100/100
2.0.0
1582


[1.4504812788963317, 0.95174, 0.6946903, 2.493707]