In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
os.chdir('/content/drive/MyDrive/WIKM')

### Change Image to NP array

In [None]:
def Dataset_loader(DIR, RESIZE, sigmaX=10):
    IMG = []
    read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
    for IMAGE_NAME in tqdm(os.listdir(DIR)):
        PATH = os.path.join(DIR,IMAGE_NAME)
        _, ftype = os.path.splitext(PATH)
        if ftype == ".jpg":
            img = read(PATH)
           
            img = cv2.resize(img, (RESIZE,RESIZE))
           
            IMG.append(np.array(img))
    return IMG

In [None]:
#Arrays for bone cancer

NonTumor_train = np.array(Dataset_loader('/content/drive/MyDrive/WIKM/OsteosarcomaData/Non-Tumor',224))
NonViableTumor_train = np.array(Dataset_loader('/content/drive/MyDrive/WIKM/OsteosarcomaData/Non-Viable-Tumor',224))
Viable_train = np.array(Dataset_loader('/content/drive/MyDrive/WIKM/OsteosarcomaData/Viable', 224))

In [None]:
#Arrays for lung cancer

insitu_train = np.array(Dataset_loader('/content/drive/MyDrive/WIKM/patchedImagesPNG256x256/Train/in_situ',224))
invasive_train = np.array(Dataset_loader('/content/drive/MyDrive/WIKM/patchedImagesPNG256x256/Train/invasive',224))
normal_train = np.array(Dataset_loader('/content/drive/MyDrive/WIKM/patchedImagesPNG256x256/Train/normal', 224))

insitu_test = np.array(Dataset_loader('/content/drive/MyDrive/WIKM/patchedImagesPNG256x256/Test/in_situ',224))
invasive_test = np.array(Dataset_loader('/content/drive/MyDrive/WIKM/patchedImagesPNG256x256/Test/invasive',224))
normal_test = np.array(Dataset_loader('/content/drive/MyDrive/WIKM/patchedImagesPNG256x256/Test/normal', 224))

### Creating Labels for Bone

In [None]:
# Create labels
NonTumor_train_label = np.zeros(len(NonTumor_train))
NonViableTumor_train_label = np.ones(len(NonViableTumor_train))
Viable_train_label = np.full(len(Viable_train), 2)

# Merge data 
X_train = np.concatenate((NonTumor_train, NonViableTumor_train, Viable_train), axis = 0)
Y_train = np.concatenate((NonTumor_train_label, NonViableTumor_train_label, Viable_train_label), axis = 0)

In [None]:
#Save np array

np.save('/content/drive/MyDrive/WIKM/X_train_bone', X_train)
np.save('/content/drive/MyDrive/WIKM/Y_train_bone', Y_train)

In [None]:
#Load np array

X_train = np.load('X_train_bone.npy')
Y_train = np.load('Y_train_bone.npy')

In [None]:
# Shuffle train data
s = np.arange(X_train.shape[0])
np.random.shuffle(s)
X_train= X_train[s]
Y_train = Y_train[s]

### Create Label for Lung

In [None]:
# Create labels
normal_train_label = np.zeros(len(normal_train))
invasive_train_label = np.ones(len(invasive_train))
insitu_train_label = np.full(len(insitu_train), 2)

normal_test_label = np.zeros(len(normal_test))
invasive_test_label = np.ones(len(invasive_test))
insitu_test_label = np.full(len(insitu_test), 2)

In [None]:
# Merge data 
X_train = np.concatenate((normal_train, invasive_train, insitu_train), axis = 0)
Y_train = np.concatenate((normal_train_label, invasive_train_label, insitu_train_label), axis = 0)

X_test = np.concatenate((normal_test, invasive_test, insitu_test), axis = 0)
Y_test = np.concatenate((normal_test_label, invasive_test_label, insitu_test_label), axis = 0)

In [None]:
#Save np array

np.save('/content/drive/MyDrive/WIKM/X_train', X_train)
np.save('/content/drive/MyDrive/WIKM/Y_train', Y_train)
np.save('/content/drive/MyDrive/WIKM/X_test', X_test)
np.save('/content/drive/MyDrive/WIKM/Y_test', Y_test)

In [None]:
#Load arrays

X_train = np.load('X_train_lung.npy')
Y_train = np.load('Y_train_lung.npy')
X_test = np.load('X_test_lung.npy')
Y_test = np.load('Y_test_lung.npy')

In [None]:
# Shuffle train data

s = np.arange(X_train.shape[0])
np.random.shuffle(s)
X_train = X_train[s]
Y_train = Y_train[s]

In [None]:
# Shuffle test data

s = np.arange(X_test.shape[0])
np.random.shuffle(s)
X_test = X_test[s]
Y_test = Y_test[s]

### Train and Evaluation Split

In [None]:
x_train, x_val, y_train, y_val = train_test_split(
    X_train, Y_train, 
    test_size=0.2, 
    random_state=11
)

### Display some images

In [None]:
# # Display first 15 images of moles, and how they are classified
w=60
h=40
fig=plt.figure(figsize=(15, 15))
columns = 4
rows = 3

for i in range(1, columns*rows +1):
    ax = fig.add_subplot(rows, columns, i)
    if np.argmax(Y_train[i]) == 0:
        ax.title.set_text('normal')
    elif np.argmax(Y_train[i]) == 1:
      ax.title.set_text('invasive')
    else:
        ax.title.set_text('in_situ')
    plt.imshow(x_train[i], interpolation='nearest')
plt.show()