# Abnormality Detection in bone X-Ray Radiographs

# Preprocessing

Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from skimage.filters import unsharp_mask
import warnings
warnings.filterwarnings('ignore')
import re
import os

In [2]:
#Reading the training dataset using pd.read_csv()
train = pd.read_csv("wrangled_files/train.csv")
#Reading the valid dataset
valid = pd.read_csv("wrangled_files/valid.csv")

In [3]:
train.drop(columns = ['Unnamed: 0'],inplace = True)
#Renaming the columns of train dataframe
train.rename(columns = {'train_path':'path','train_labels':'label'}, inplace = True)
#Printing the first five rows of dataframe 
train.head()

Unnamed: 0,path,label,body_part,study_type
0,MURA-v1.1/train/XR_SHOULDER/patient00001/study...,1.0,SHOULDER,study1
1,MURA-v1.1/train/XR_SHOULDER/patient00001/study...,1.0,SHOULDER,study1
2,MURA-v1.1/train/XR_SHOULDER/patient00001/study...,1.0,SHOULDER,study1
3,MURA-v1.1/train/XR_SHOULDER/patient00002/study...,1.0,SHOULDER,study1
4,MURA-v1.1/train/XR_SHOULDER/patient00002/study...,1.0,SHOULDER,study1


In [4]:
valid.drop(columns = ['Unnamed: 0'],inplace = True)
#Renaming the valid dataframe columns 
valid.rename(columns = {'valid_path':'path','valid_labels':'label'}, inplace = True) 
#Printing the first five rows of valid dataframe
valid.head()

Unnamed: 0,path,label,body_part,study_type
0,MURA-v1.1/valid/XR_WRIST/patient11185/study1_p...,1.0,WRIST,study1
1,MURA-v1.1/valid/XR_WRIST/patient11185/study1_p...,1.0,WRIST,study1
2,MURA-v1.1/valid/XR_WRIST/patient11185/study1_p...,1.0,WRIST,study1
3,MURA-v1.1/valid/XR_WRIST/patient11185/study1_p...,1.0,WRIST,study1
4,MURA-v1.1/valid/XR_WRIST/patient11186/study1_p...,1.0,WRIST,study1


In [5]:
#Printing the current working  directory 
pwd

'C:\\Users\\user\\Springboard\\Course\\Capstone - 2\\MURA-v1.1'

In [33]:
#Defing the preprocessing function
def preprocessing(df):
    for i in range(0,len(df['path'])):
         #Read Image
        img_path =  df.loc[i,'path']
        image = cv2.imread(img_path)
    
        #Resize image
        height = 224
        width = 224
        dim = (width,height)
        image =  cv2.resize(image, dim, interpolation= cv2.INTER_LINEAR)
    
        #Increasing contrast of the image 
        image=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
        # using Contrast Limited Adaptive Histogram Equliser to increase contrast 
        clahe=cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) 
        image =clahe.apply(image)
    
        #Detecting edges
        blurred = cv2.GaussianBlur(image,(5,5),0) # For image smoothing 
        canny = cv2.Canny(image,100,200) # For detecting edges
        pts = np.argwhere(canny>0)
        #using the edges detected getting the threshold values 
        try:
            y1,x1 = pts.min(axis=0)
            y2,x2 = pts.max(axis=0)
            #Cropping the image based on the thresholds obtained
            image = image[y1:y2, x1:x2]
        except ValueError:  #raised if `y1,x1,y2,x2` is empty.
            pass
        
            
        #Call the funtion creating the path for storage
        if not os.path.exists('gen_data'):
            os.mkdir('gen_data')
        
        #Creating the outer directory
        out_dir = 'gen_data/'
        #Getting the folder path
        path = re.search(r'(/.*/.*/.*/.*/)',df.loc[i,'path'])
        path = path.group(1)
        
    
        #Adding outer directory and folder
        file_out = out_dir + 'MURA-v1.1' + path 
        if not os.path.exists(file_out):
            os.makedirs(file_out)
                    
        #creating the image name 
        img_name = re.search(r'/.*/.*/.*/.*/(.*)',df.loc[i,'path'])
        img_name = img_name.group(1)
        
    
        #adding the paths to get the final path(same path folder is created inside a outer directory) 
        final_path = file_out  + img_name
        
        
        #writing the preprocessed image into the same path mentioned in csv file 
        try:
            cv2.imwrite(final_path , image)
            cv2. waitKey(0)
            cv2. destroyAllWindows()
        except cv2.error as e:
            pass 
   


In [34]:
#Passing the valid df as argument in function preprocessing
preprocessing(train)
#Passing the valid df as argument in function preprocessing
preprocessing(valid)

In [35]:
train.to_csv("gen_data/train.csv",index=True)
valid.to_csv("gen_data/valid.csv",index=True)

# Modelling