In [0]:
!pip install kaggle
import json
import pandas as pd

api_token = {"username":"twoface262","key":"453e89deca1ef616f15f5725eed93000"}

with open('kaggle.json', 'w') as kaggle_json:
  json.dump(api_token, kaggle_json)
  
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

!kaggle competitions download -c human-protein-atlas-image-classification
!ls

Downloading sample_submission.csv to /content
  0% 0.00/446k [00:00<?, ?B/s]
100% 446k/446k [00:00<00:00, 67.8MB/s]
Downloading train.csv to /content
  0% 0.00/1.22M [00:00<?, ?B/s]
100% 1.22M/1.22M [00:00<00:00, 82.1MB/s]
Downloading test.zip to /content
100% 4.37G/4.37G [01:53<00:00, 8.18MB/s]

Downloading train.zip to /content
100% 13.1G/13.1G [07:45<00:00, 23.6MB/s]

kaggle.json  sample_data  sample_submission.csv  test.zip  train.csv  train.zip


In [0]:
!mkdir data_train
!unzip train.zip -d data_train

In [0]:
!ls

data_train   sample_data	    test.zip  train.csv
kaggle.json  sample_submission.csv  train


In [0]:

class UNET_DATA:
	def __init__(self, labels_arr=None, image_arr=None):
		self.current_batch = 0
		self.batch_size = 0
		self.total_batches = 0
		self.labels = labels_arr
		self.images = image_arr

		if csv_path and images_path:
			print("Found CSV")
			data_frame = self.open_csv(csv_path)
			self.data_dictionary = self.fetch_images_with_csv(images_path, data_frame)
			self.data_keys = list(self.data_dictionary.keys())
		else:
			print("No CSV")
			self.data_dictionary = None

	def set_batch_size(self, new_size):
		''' Responsible for setting a new batch size

			Input:
				- new_size: int -- corresponds to the new batch size we want to assign
		'''
		self.batch_size = new_size

	def get_batch_size(self):
		''' Responsible for returning the batch size for the class

			Returns:
				-int -- corresponds to the batch size
		'''
		return self.batch_size

	def get_total_batches(self):
		''' Responsible for returning how many batches of data are in our dataset

			Returns:
				- int -- corresponds to the number of batches in our dataset
		'''
		return math.floor(self.images/batch_size)

	
	def get_next_batch(self):
		'''	Responsible for batching the data arrays and returning them
		
			Returns: 
				label_batch: arr -- batch of labels for the associated image
				image_batch: arr -- batch of images for the associated labels
		'''
		start_pos = (self.batch_size * self.current_batch)
		end_pos =  (self.batch_size * self.current_batch+1)

		label_batch = []
		image_batch = []
		if not self.data_dictionary:
			label_batch = self.labels[start_pos:end_pos]
			image_batch = self.images[start_pos:end_pos]
		else:
			label_batch_keys = self.data_keys[start_pos:end_pos]
			for key in label_batch_keys:
				image_batch.append(np.resize(np.array(self.data_dictionary[key]['image_arr']), (512, 512, 1)))
				label_batch.append(self.data_dictionary[key]['labels'])

		# Reset the current batch once we've iterated through all of our data
		self.current_batch += 1
		if(self.current_batch >= self.total_batches):
			self.current_batch = 0

		return label_batch, image_batch
		
	def fetch_data(self, path_to_csv):
		''' Handles fetching the data from the DICOM Importer
		
			Assigns:
				self.labels
				self.images
		'''
		images_arr, labels_arr = self.import_labels_from_csv(path_to_csv)
		self.images = images_arr
		self.labels = labels_arr
		
	def import_labels_from_csv(self, path):
		''' Handles opening a CSV of data and reading in the information to match
			The image with the label.
			
			Input: 
				- path: String -- path to the CSV
			
			Returns:
				- images: list -- list of image file names
				- labels: list -- list of boolean labels
		'''
		csv_dataframe = pd.read_csv(path)
		images = list(csv_dataframe['file_name'])
		labels = list(csv_dataframe['has_tbi'])
		return images, labels

	def open_csv(self, path):
		''' Handles opening a labels CSV for the test set and returning the datframe

			Input:
				- path: String -- path to CSV

			Returns:
				- csv_dataframe: pandas dataframe for labels

		'''
		csv_dataframe = pd.read_csv(path)
		return csv_dataframe

	def fetch_images_with_csv(self, path, dataframe):
		''' Handles fetching images from a filepath and constructs a dictionary with their labels

			Input:
				- path: String -- path to data folder
				- dataframe: pandas dataframe

			Returns:
				- Dictionary of data structured as:
				{
					image_name : {
						image_arr: [2D pixel array],
						labels: [labels array]
					}
				}
		'''
		data_dictionary = {}
		count = 0
		for row in tqdm(dataframe.iterrows()):
			data_dictionary[row[1][0]] = {}
			image_path = path +'/' + row[1][0] + '_blue.png'
			image = list(Image.open(image_path).getdata())
			data_dictionary[row[1][0]]['image_arr'] = image
			data_dictionary[row[1][0]]['labels'] = np.resize(np.array(row[1][1].split(' ')[0]), (1))
			if count == 1:
				break
			count += 1
		return data_dictionary



In [0]:

class BSSCS_UNET:
	def __init__(self, iterations, batch_size, data_class, labels_shape=[None, 1], learning_rate=0.001):
		self.learning_rate = learning_rate
		self.iterations = iterations
		self.batch_size = batch_size
		self.data_class = data_class
		self.labels_shape = labels_shape


	def generate_unet_arch(self, input):
		''' Handles generating a TF Implementation of a UNET utilizing the architecture discussed in
		    "U-Net: Convolutional Networks for Biomedical Image Segmentation" by Ronneberger, Fischer, and Brox

		    The architecture for the UNET is not ours and all accrediation goes to Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 
		    We are not claiming any ownership for the architecture. 
		    Implementing the UNET arch is comparable to implementing selection sort.

		    tf.layers.conv2d docs: https://www.tensorflow.org/api_docs/python/tf/layers/conv2d
			tf.layers.conv2d_transpost for up convolutions: https://www.tensorflow.org/api_docs/python/tf/layers/conv2d_transpose
			tf.concat for the copy and crop methods: https://www.tensorflow.org/api_docs/python/tf/concat
			tf.slice for cropping the tensors: https://www.tensorflow.org/api_docs/python/tf/slice
			Hopefully I implemented this correctly  ¯\_(ツ)_/¯

			Quick guide on cropping a tensor -..

			So after some research through the doc's I found out that we can't just crop it as if it were an image, because we are dealing with Tensors (matricies of data).

			In order to crop a tensor we must use TensorFlow's slice function (https://www.tensorflow.org/api_docs/python/tf/slice)
			
			Here we are cropping the convolutional layer we are upsampling to be the size of the convolutional layer we are concating to. 
			I'm starting at the base coordinates for the tensor object, and am cropping JUST the images (or filters). Thus why we have [-1, size_x, size_y, -1]. 
			The '-1' values are there to ensure we are keeping the remaining elements of the dimension (AKA our batch size and number of filters) . 
			From TF Docs on the -1 values: " If size[i] is -1, all remaining elements in dimension i are included in the slice. In other words, this is equivalent to setting: size[i] = input.dim_size(i) - begin[i]"

			Once the tensor is properly cropped (Where each filter is the same size as the tensor we are copying into), we can concat the tensors. 
			This allows us to copy in all of the previous filters into the current tensor. The final shape will be: [Batch, Img_X, Img_Y, [Filters_A + Filters_B]]
		'''
		# first block in UNET --> Concat with the final block
		convolution_layer_1 = tf.layers.conv2d(inputs=input, filters=64, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_2 = tf.layers.conv2d(inputs=convolution_layer_1, filters=64, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		max_pooling_layer_1 = tf.layers.max_pooling2d(inputs=convolution_layer_2, pool_size=[2, 2], strides=1, padding="VALID")

		# second block in UNET --> Concat with second to final block
		convolution_layer_3 = tf.layers.conv2d(inputs=max_pooling_layer_1, filters=128, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_4 = tf.layers.conv2d(inputs=convolution_layer_3, filters=128, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		max_pooling_layer_2 = tf.layers.max_pooling2d(inputs=convolution_layer_4, pool_size=[2, 2], strides=1, padding="VALID")

		# third block in UNET --> Concat with third from final block
		convolution_layer_5 = tf.layers.conv2d(inputs=max_pooling_layer_2, filters=256, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_6 = tf.layers.conv2d(inputs=convolution_layer_5, filters=256, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		max_pooling_layer_3 = tf.layers.max_pooling2d(inputs=convolution_layer_6, pool_size=[2, 2], strides=1, padding="VALID")

		# fourth block in UNET --> Concat with fourth from final block
		convolution_layer_7 = tf.layers.conv2d(inputs=max_pooling_layer_3, filters=512, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_8 = tf.layers.conv2d(inputs=convolution_layer_7, filters=512, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		max_pooling_layer_4 = tf.layers.max_pooling2d(inputs=convolution_layer_8, pool_size=[2, 2], strides=1, padding="VALID")

		# middle UNET block
		convolution_layer_9 = tf.layers.conv2d(inputs=max_pooling_layer_4, filters=1024, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_10 = tf.layers.conv2d(inputs=convolution_layer_9, filters=1024, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_up_1 = tf.layers.conv2d_transpose(inputs=convolution_layer_10, filters=1024, kernel_size=[2, 2], strides=1, padding="SAME")
		
		# fourth from last 
		convolution_layer_8 = tf.slice(convolution_layer_8, [0, 0, 0, 0], [-1, convolution_up_1.shape[1], convolution_up_1.shape[2], -1])
		concat_layer_1 = tf.concat([convolution_up_1, convolution_layer_8], axis=3) # Note: Experiment with the axis to ensure it is correct. Are we copying the batches or the filters? -- However; different axis's cause an error.
		# print(concat_layer_1.shape) # Comes out to be [Batch_Size, Image_X, Image_Y, (Filters_Conv_8 + Filters_Conv_Up_1)]
		convolution_layer_11 = tf.layers.conv2d(inputs=concat_layer_1, filters=512, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_12 = tf.layers.conv2d(inputs=convolution_layer_11, filters=512, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_up_2 = tf.layers.conv2d_transpose(inputs=convolution_layer_12, filters=512, kernel_size=[2, 2], strides=1, padding="SAME")

		
		# third from last
		convolution_layer_6 = tf.slice(convolution_layer_6, [0, 0, 0, 0], [-1, convolution_up_2.shape[1], convolution_up_2.shape[2], -1])
		concat_layer_1 = tf.concat([convolution_up_2, convolution_layer_6], axis=3)
		convolution_layer_13 = tf.layers.conv2d(inputs=convolution_up_2, filters=256, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_14 = tf.layers.conv2d(inputs=convolution_layer_13, filters=256, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_up_3 = tf.layers.conv2d_transpose(inputs=convolution_layer_14, filters=256, kernel_size=[2, 2], strides=1, padding="SAME")

		# second from last
		convolution_layer_4 = tf.slice(convolution_layer_4, [0, 0, 0, 0], [-1, convolution_up_3.shape[1], convolution_up_3.shape[2], -1])
		concat_layer_1 = tf.concat([convolution_up_3, convolution_layer_4], axis=3)
		convolution_layer_15 = tf.layers.conv2d(inputs=convolution_up_3, filters=256, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_16 = tf.layers.conv2d(inputs=convolution_layer_15, filters=128, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_up_4 = tf.layers.conv2d_transpose(inputs=convolution_layer_16, filters=128, kernel_size=[2, 2], strides=1, padding="SAME")

		# last block
		convolution_layer_2 = tf.slice(convolution_layer_2, [0, 0, 0, 0], [-1, convolution_up_4.shape[1], convolution_up_4.shape[2], -1])
		concat_layer_1 = tf.concat([convolution_up_4, convolution_layer_2], axis=3)
		convolution_layer_17 = tf.layers.conv2d(inputs=convolution_up_4, filters=128, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_18 = tf.layers.conv2d(inputs=convolution_layer_17, filters=64, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_layer_19 = tf.layers.conv2d(inputs=convolution_layer_18, filters=64, kernel_size=[3, 3], strides=1, padding="SAME", activation=tf.nn.relu)
		convolution_up_5 = tf.layers.conv2d_transpose(inputs=convolution_layer_19, filters=2, kernel_size=[1, 1], strides=1, padding="SAME")
		
		flattened = tf.reshape(convolution_up_5, [-1, 1016])
		return flattened

	def create_regressor(self, input): 
		''' Handles creating the regressor for the UNET classification

			Parameters:
				- input -- input layer (flattened layer from UNET) 
			Returns:
			 	- Tensor -- last layer in the regressor
		'''
		reg_input = tf.layers.dense(inputs=input, units=1016, activation=tf.nn.relu)
		#reg_hidden = tf.layers.dense(inputs=reg_input, units=20, activation=tf.nn.relu)
		reg_out = tf.layers.dense(inputs=reg_input, units=2)
		return reg_out

	def create_loss(self, input, labels):
		''' Handles creating a loss function and returning it to the optimizer

			Parameters:
			 	- Input: The final layer in the graph we are computing the loss for
			 	- Labels: The labels for the batch we are computing the loss for

			 Returns:
			 	- Defined loss function

			TensorFlow documentation: 
			https://www.tensorflow.org/api_docs/python/tf/nn/softmax_cross_entropy_with_logits_v2
		'''
		return tf.nn.softmax_cross_entropy_with_logits_v2(logits=input, labels=labels)

	def create_optimizer(self, input, labels):
		'''

			TensorFlow documentation: 
			https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
		'''
		loss = tf.reduce_mean(self.create_loss(input, labels))
		return tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss)

	def train_unet(self):
		''' Handles training a UNET based off the data fed to it
		'''


		# This is where I would put my loss and optimization functions -..
		# ..
		# ..
		# IF I HAD ONE!
		#
		# Meme Reference: https://www.youtube.com/watch?v=ciWPFvLS5IY
		#
		# On a serious note -.. Here is where we will plug in the deep regressor once that's built.
		# After a UNET run the image will be passed to the deep regressor.
		# The regressor will contain the loss function we are optimizing to.
		input_ph = tf.placeholder(tf.float32, shape=[None, 512, 512, 1]) # Placeholder vals were given by paper in initial layer -- these numbers were referenced from the paper.
		conv_input = self.generate_unet_arch(input_ph)
		classifier = self.create_regressor(conv_input)
		labels_placeholder = tf.placeholder(tf.float32, shape=self.labels_shape)
		optimizer = self.create_optimizer(classifier, labels_placeholder)
		loss = self.create_loss(classifier, labels_placeholder)
		with tf.Session() as session:
			tf.global_variables_initializer().run()
			for iteration in range(0, self.iterations): # counts for epochs -- or how many times we go through our data
				for batch in range(0, self.batch_size):
					y_b, X_b = self.data_class.get_next_batch()
					session.run(optimizer, feed_dict={input_ph:X_b, labels_placeholder:y_b})
					
				if iteration % 500 == 0:
					it_loss = session.run(loss, feed_dict={input_ph:X_b, labels_placeholder:y_b})

					# Evaluate mse loss here and print the value
					print("Passed 500 iterations with mse: " + it_loss)
		

	def test_unet(self, graph_out, input_x):
		''' Runs a trained UNET through an evaluation/test phase to detect errors

			Parameters:
				- graph_out: conv2d_tranpose tensor - The last layer in the graph
				- input_x: image_arr  - Image array of shape [None, x, y, 1]

			Returns:
				- output: Returns the output of the unet graph
		'''
		with tf.Session() as session:
			output = session.run(graph_out, feed_dict={input:input_x})

		return output

	def save_graph(self):
		''' Saves a UNET graph
		'''
		# To-Do: Implement when deep regressor is finished
		return None

	def load_graph(self):
		''' Loads a UNET graph
		'''
		# To-Do: Implement when deep regressor is finished
		return None