In [8]:
import Utils
import copy
import torch
import warnings
import numpy as np
from tqdm import tqdm
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torchvision

### Check Image Classification Data Count

In [2]:
from torch.utils.data import Dataset

# Classification With mask
class classification_mask_dataset(Dataset):
    def __init__(self, classification_dir, segmentation_dir, txt_COVID, txt_NonCOVID, transform=None, lateral_map=1,
                 min_seg=0.01):
        self.classification_dir = classification_dir
        self.segmentation_dir = segmentation_dir

        self.txt_path = [txt_COVID, txt_NonCOVID]
        self.classes = ['CT_COVID', 'CT_NonCOVID']
        self.num_cls = len(self.classes)

        self.img_list = []
        self.segment_list = []
        self.min_seg = min_seg

        for c in range(self.num_cls):
            # Classification List
            cls_list = [[os.path.join(self.classification_dir, self.classes[c], item), c] for item in
                        read_txt(self.txt_path[c])]
            self.img_list += cls_list
            # Segmentation List
            seg_list = [[os.path.join(self.segmentation_dir, self.classes[c], "lateral_map" + str(lateral_map),
                                      item.replace('.jpg', '.png')), c] for item in read_txt(self.txt_path[c])]
            self.segment_list += seg_list

        self.transform = transform

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Original Data
        img_path = self.img_list[idx][0]
        image = Image.open(img_path).convert('RGB')
        image = image.resize((256, 256))

        # Segmentation Data
        seg_path = self.segment_list[idx][0]
        seg = Image.open(seg_path).convert('RGB')
        seg = seg.rotate(-90, expand=True)
        seg = seg.resize((256, 256))

        # Mask with Original Data
        # Step 1 => Segmentation Min-Max Normalization + Min Value(Hyperparameter)
        seg_np = np.array(seg)
        seg_mask = (seg_np - seg_np.min()) / (seg_np.max() - seg_np.min()) + self.min_seg
        # Clip max = 1
        seg_mask = np.clip(seg_mask, 0, 1)

        # Step 2 => Original Data with seg_mask
        image_with_mask = np.multiply(image, seg_mask)

        # Step 3 => Change Numpy Dtype => For Using Image Preprocessing
        image_with_mask = Image.fromarray(np.uint8(image_with_mask))

        if self.transform:
            image_with_mask = self.transform(image_with_mask)
            # image = self.transform(image)

        sample = {'img': image_with_mask,
                  'label': int(self.img_list[idx][1])}
        return sample

In [16]:
import os

def read_txt(txt_path):
    with open(txt_path) as f:
        lines = f.readlines()
    txt_data = [line.strip() for line in lines]
    return txt_data

In [53]:
len(os.listdir(os.path.join(audio_dir, audio_classes[0])))

136

In [54]:
classification_dir = './dataset/image/classfication/'
segmentation_dir = './dataset/image/classfication/Segmentation/'
audio_dir = './dataset/audio/preprocess/train/'

txt_path = ['./dataset/image/classfication/Data-split/COVID/trainCT_COVID.txt', 
                 './dataset/image/classfication/Data-split/NonCOVID/trainCT_NonCOVID.txt']

classes = ['CT_COVID', 'CT_NonCOVID']
audio_classes = ['pos', 'neg']
num_cls = len(classes)

img_list = []
segment_list = []
audio_list = []

for c in range(num_cls):
    # Classification List
    cls_list = [[os.path.join(classification_dir, classes[c], item), c] for item in read_txt(txt_path[c])]
    img_list += cls_list
    # Segmentation List
    seg_list = [[os.path.join(segmentation_dir, classes[c], "lateral_map" + str(3),
                              item.replace('.jpg', '.png')), c] for item in read_txt(txt_path[c])]
    segment_list += seg_list
    # Audio List
    a_list = [[item, c] for item in os.listdir(os.path.join(audio_dir, audio_classes[c]))]
    audio_list += a_list

In [119]:
img_list = np.array(img_list)
pos_index = (img_list[:,1]=='0')
neg_index = (img_list[:,1]=='1')
pos_img_list = img_list[:, 0][pos_index]
neg_img_list = img_list[:, 0][neg_index]

In [118]:
img_list[:,0][neg_index]

array(['./dataset/image/classfication/CT_NonCOVID/14.png',
       './dataset/image/classfication/CT_NonCOVID/15.png',
       './dataset/image/classfication/CT_NonCOVID/33.png',
       './dataset/image/classfication/CT_NonCOVID/43.png',
       './dataset/image/classfication/CT_NonCOVID/44.png',
       './dataset/image/classfication/CT_NonCOVID/59.png',
       './dataset/image/classfication/CT_NonCOVID/81.png',
       './dataset/image/classfication/CT_NonCOVID/82.png',
       './dataset/image/classfication/CT_NonCOVID/102.png',
       './dataset/image/classfication/CT_NonCOVID/103.png',
       './dataset/image/classfication/CT_NonCOVID/104.png',
       './dataset/image/classfication/CT_NonCOVID/114.png',
       './dataset/image/classfication/CT_NonCOVID/115.png',
       './dataset/image/classfication/CT_NonCOVID/116.png',
       './dataset/image/classfication/CT_NonCOVID/117.png',
       './dataset/image/classfication/CT_NonCOVID/118.png',
       './dataset/image/classfication/CT_NonCOVI

In [114]:
pos_img_list

array([], shape=(191, 0), dtype='<U74')

In [112]:
pos_index

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [None]:
pos_index = (img_list[:,1]==0)
neg_index = (img_list[:,1]==1)

In [98]:
aa_list = np.array(audio_list)[:,1]

In [101]:
aa_list == '0'

array([ True,  True,  True, ..., False, False, False])

In [97]:
aa_list.view(dtype=int)

ValueError: To change to a dtype of a different size, the array must be C-contiguous

In [82]:
for a in np.array(audio_list)[:,1]:
    print(a)

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [83]:
np.array(audio_list)[:,1] < 1

TypeError: '<' not supported between instances of 'numpy.ndarray' and 'int'

In [76]:
np.where(np.array(audio_list)[:,1])

(array([   0,    1,    2, ..., 3240, 3241, 3242]),)

In [29]:
np.array(img_list)[:,0]

array(['./dataset/image/classfication/CT_COVID/2020.01.24.919183-p27-132.png',
       './dataset/image/classfication/CT_COVID/2020.01.24.919183-p27-133.png',
       './dataset/image/classfication/CT_COVID/2020.01.24.919183-p27-134.png',
       './dataset/image/classfication/CT_COVID/2020.01.24.919183-p27-135.png',
       './dataset/image/classfication/CT_COVID/2020.02.10.20021584-p6-52%0.png',
       './dataset/image/classfication/CT_COVID/2020.02.10.20021584-p6-52%1.png',
       './dataset/image/classfication/CT_COVID/2020.02.10.20021584-p6-52%10.png',
       './dataset/image/classfication/CT_COVID/2020.02.10.20021584-p6-52%11.png',
       './dataset/image/classfication/CT_COVID/2020.02.10.20021584-p6-52%12.png',
       './dataset/image/classfication/CT_COVID/2020.02.10.20021584-p6-52%13.png',
       './dataset/image/classfication/CT_COVID/2020.02.10.20021584-p6-52%14.png',
       './dataset/image/classfication/CT_COVID/2020.02.10.20021584-p6-52%15.png',
       './dataset/image/classf

In [22]:
segment_list

[['./dataset/image/classfication/Segmentation/CT_COVID/lateral_map3/2020.01.24.919183-p27-132.png',
  0],
 ['./dataset/image/classfication/Segmentation/CT_COVID/lateral_map3/2020.01.24.919183-p27-133.png',
  0],
 ['./dataset/image/classfication/Segmentation/CT_COVID/lateral_map3/2020.01.24.919183-p27-134.png',
  0],
 ['./dataset/image/classfication/Segmentation/CT_COVID/lateral_map3/2020.01.24.919183-p27-135.png',
  0],
 ['./dataset/image/classfication/Segmentation/CT_COVID/lateral_map3/2020.02.10.20021584-p6-52%0.png',
  0],
 ['./dataset/image/classfication/Segmentation/CT_COVID/lateral_map3/2020.02.10.20021584-p6-52%1.png',
  0],
 ['./dataset/image/classfication/Segmentation/CT_COVID/lateral_map3/2020.02.10.20021584-p6-52%10.png',
  0],
 ['./dataset/image/classfication/Segmentation/CT_COVID/lateral_map3/2020.02.10.20021584-p6-52%11.png',
  0],
 ['./dataset/image/classfication/Segmentation/CT_COVID/lateral_map3/2020.02.10.20021584-p6-52%12.png',
  0],
 ['./dataset/image/classfication/

In [3]:
trainset = Utils.classification_mask_dataset(classification_dir='./dataset/image/classfication/',
                                           segmentation_dir='./dataset/image/classfication/Segmentation/',
                                           txt_COVID='./dataset/image/classfication/Data-split/COVID/trainCT_COVID.txt',
                                           txt_NonCOVID='./dataset/image/classfication/Data-split/NonCOVID/trainCT_NonCOVID.txt',
                                           lateral_map=3, min_seg=0.8)

valset = Utils.classification_mask_dataset(classification_dir='./dataset/image/classfication/',
                                         segmentation_dir='./dataset/image/classfication/Segmentation/',
                                         txt_COVID='./dataset/image/classfication/Data-split/COVID/valCT_COVID.txt',
                                         txt_NonCOVID='./dataset/image/classfication/Data-split/NonCOVID/valCT_NonCOVID.txt',
                                         lateral_map=3, min_seg=0.8)

testset = Utils.classification_mask_dataset(classification_dir='./dataset/image/classfication/',
                                                segmentation_dir='./dataset/image/classfication/Segmentation/',
                                                txt_COVID='./dataset/image/classfication/Data-split/COVID/testCT_COVID.txt',
                                                txt_NonCOVID='./dataset/image/classfication/Data-split/NonCOVID/testCT_NonCOVID.txt',
                                                lateral_map=3, min_seg=0.8)

In [4]:
def check_image_dataset(dataset):
    covid_count = 0
    non_covid_count = 0

    for t in dataset:
        if t['label'] == 0:
            covid_count+=1
        else:
            non_covid_count+=1
            
    print('Num of COVID: ', covid_count)
    print('Num of Non COVID: ', non_covid_count)

In [6]:
print('Image Train Dataset')
check_image_dataset(trainset)
print('Image Validation Dataset')
check_image_dataset(valset)
print('Image Test Dataset')
check_image_dataset(testset)

Image Train Dataset
Num of COVID:  191
Num of Non COVID:  234
Image Validation Dataset
Num of COVID:  60
Num of Non COVID:  58
Image Test Dataset
Num of COVID:  98
Num of Non COVID:  105


### Check Audio Classification Data Count

In [60]:
# For Dataset
train_set = torchvision.datasets.ImageFolder(root='./dataset/audio/preprocess/train')
val_set = torchvision.datasets.ImageFolder(root='./dataset/audio/preprocess/validation')
test_set = torchvision.datasets.ImageFolder(root='./dataset/audio/preprocess/test')

In [61]:
def check_audio_dataset(dataset):
    covid_count = 0
    non_covid_count = 0

    for i,x in dataset:
        if x == 0:
            non_covid_count+=1
        else:
            covid_count+=1
            
    print('Num of COVID: ', covid_count)
    print('Num of Non COVID: ', non_covid_count)

In [62]:
print('Audio Train Dataset')
check_audio_dataset(train_set)
print('Audio Validation Dataset')
check_audio_dataset(val_set)
print('Audio Test Dataset')
check_audio_dataset(test_set)

Audio Train Dataset
Num of COVID:  161
Num of Non COVID:  2599
Audio Validation Dataset
Num of COVID:  15
Num of Non COVID:  15
Audio Test Dataset
Num of COVID:  20
Num of Non COVID:  20


In [None]:
aa

In [None]:
covid_count = 0
non_covid_count = 0

print('Image Train Dataset')
for t in trainset:
    if t['label'] == 0:
        pos_count+=1
    else:
        neg_count+=1

In [None]:
pos_count = 0
neg_count = 0

for t in trainset:
    if t['label'] == 0:
        pos_count+=1
    else:
        neg_count+=1

In [None]:
print('Num of neg: ', neg_count)
print('Num of pos: ', pos_count)

In [None]:
t['label']

In [None]:
import torchvision
import os

# For Dataset
train_set = torchvision.datasets.ImageFolder(root='./dataset/audio/preprocess/train')
val_set = torchvision.datasets.ImageFolder(root='./dataset/audio/spectrum/val')
test_set = torchvision.datasets.ImageFolder(root='./dataset/audio/spectrum/test')
    
# For OverSampling
num_neg = len(os.listdir('./dataset/audio/preprocess/train/neg/'))
num_pos = len(os.listdir('./dataset/audio/preprocess/train/pos/'))

In [None]:
print('Num of neg: ', num_neg)
print('Num of pos: ', num_pos)

In [None]:
zero_count = 0
one_count = 0

for i,x in train_set:
    if x == 0:
        zero_count+=1
    else:
        one_count+=1

In [None]:
print('Zero Count: ', zero_countro_count)
print('One Count: ', one_count)

In [None]:
x