# Rimozione di artefatti in un dataset di mammografie

In [1]:
import cv2
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

# Ribaltamento delle immagini

In [2]:
anomalies = pd.read_csv('data/Info.txt', sep=' ', index_col='REFNUM')

for i in range(1, 323):
    path = 'data/all-mias/mdb{:03d}.pgm'.format(i)
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    
    if i % 2 != 0:
        cv2.imwrite('data/ed/mdb{:03d}.png'.format(i), cv2.flip(img, 1))
        #modifica nel dataset delle anomalie
        x = anomalies.loc['mdb{:03d}'.format(i), 'X']
        anomalies.loc['mdb{:03d}'.format(i), 'X'] = 1024-x
    else:
        cv2.imwrite('data/ed/mdb{:03d}.png'.format(i), img)

anomalies.to_csv('data/text_removed/Info.csv')

# Rimozione del testo

In [3]:
def removeTexts(img):
    #step 1: crop dell'immagine in una regione in cui è sicuramente
    img_copy = np.copy(img)
    crop = img_copy[0:400, 400:]

    #step 2: applica  un filtro gaussiano per eliminare le basse frequenze
    blurred_1 = cv2.GaussianBlur(crop, (63, 63), 0)
    lpf = cv2.absdiff(crop, blurred_1)
    
    #step 3: Apertura per eliminare le piccole regioni bianche
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    cleaned = cv2.morphologyEx(lpf, cv2.MORPH_OPEN, kernel, iterations=1)
    
    #step 4: Canny
    canny = cv2.Canny(cleaned, 20 , 50, apertureSize=3, L2gradient=True)
    
    #step 5: dilatazione per riempire i buchi
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    dilated = cv2.dilate(canny, kernel, iterations=2)

    #step 6: si riempiono i contorni evidenziati da canny
    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    mask_filled = np.zeros_like(canny)
    cv2.drawContours(mask_filled, contours, -1, 255, thickness=cv2.FILLED)
    
    #step 7: si migliora il risultato costruendo l'involuco convesso
    hull_list = [cv2.convexHull(cnt) for cnt in contours]
    cv2.drawContours(mask_filled, hull_list, -1, 255, thickness=cv2.FILLED)
    
    #step 8: creazione della maschera ed apertura per eliminare i residui
    mask = 255-mask_filled
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
    
    #step 9: and logico con l'immagine di partenza
    img_new = img_copy
    img_new[0:400, 400:] = cv2.bitwise_and(crop, crop, mask=mask)
    return img_new

# Post Processing

In [4]:
def postProcessing(edited, original):
    canny = cv2.Canny(edited, 100 , 200, apertureSize=3, L2gradient=True)
    canny = canny[0: 512, 320:]
            
    contours, _ = cv2.findContours(canny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    mask_filled = np.zeros_like(canny)
    cv2.drawContours(mask_filled, contours, -1, 255, thickness=cv2.FILLED)
    
    hull_list = [cv2.convexHull(cnt) for cnt in contours]
    cv2.drawContours(mask_filled, hull_list, -1, 255, thickness=cv2.FILLED)        
    
    errors = canny + mask_filled
    
        
    img_orig_crop = original[0:512, 320:] 
    img_crop = edited[0:512, 320:]

    positions = np.where(errors != 0)
    
    for (k, t) in zip(positions[0], positions[1]):
        if img_orig_crop[k][t] > 220 and t > 200:
            img_crop[k][t] = 0
        else:
            img_crop[k][t] = img_orig_crop[k][t]
            
    return edited

# Pipeline di rimozione degli artefatti

In [5]:
nums = pd.read_csv('./data/corrette.txt')['1'].values
sel = [i for i in range(1, 323) if i not in nums]

#le cartelle devono essere create, perchè opencv non le crea automaticamente

corrections = pd.read_csv('./data/correzioni.csv', index_col='img')

for i in sel:
    index = 'mdb{:03d}'.format(i)
    path = 'data/ed/' + index +'.png'
    original = cv2.imread(path, cv2.IMREAD_GRAYSCALE) 
    
    needCorrections = index in corrections.index
    
    if (needCorrections) and corrections.loc[index]['type'] == 'b':
        col = corrections.loc[index]['col']
        original[:, col:] = 0
        cv2.imwrite('data/text_removed/mdb{:03d}.png'.format(i), original)
        
    else:
        edited = removeTexts(original)
        edited = postProcessing(edited=edited, original=original)
    
        if needCorrections and corrections.loc[index]['type'] == 'a':
            col = corrections.loc[index]['col']
            edited[:, col:] = 0
            
        cv2.imwrite('data/text_removed/mdb{:03d}.png'.format(i), edited)

## Casi particolari

In [6]:
# casi particolari
path = 'data/ed/mdb274.png'
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
ed = postProcessing(edited=removeTexts(img), original=img)
ed[920:, :] = 0
ed[:, 750:] =0
cv2.imwrite('data/text_removed/mdb274.png', ed)

path = 'data/ed/mdb280.png'
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
ed = postProcessing(edited=removeTexts(img), original=img)
ed[900:, :] = 0
ed[:, 750:] =0
cv2.imwrite('data/text_removed/mdb280.png', ed)

path = 'data/ed/mdb308.png'
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img[0:250, 700:] = 0
cv2.imwrite('data/text_removed/mdb308.png', ed)

True

## Rimozioni di piccole strisce bianche

In [7]:
nums = pd.read_csv('./data/corrette.txt')['1'].values
sel = [i for i in range(1, 323) if i not in nums]

no_strip = [32, 50, 126, 133, 134, 141, 144, 173, 193, 216, 260, 279, 294, 300, 310, 320]

for i in no_strip:
    sel.remove(i)
    
for i in sel:
    path = 'data/text_removed/mdb{:03d}.png'.format(i)
    original = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    crop = original[0:400, 700:]
    
    contrast = cv2.convertScaleAbs(src=crop, alpha=1, beta=100)
    _, ret = cv2.threshold(contrast, 170 ,255,cv2.THRESH_BINARY)

    open = cv2.morphologyEx(ret, cv2.MORPH_OPEN, kernel=cv2.getStructuringElement(shape=cv2.MORPH_ELLIPSE, ksize=(33, 33)))
    mask = 255 - cv2.subtract(ret, open)
    
    pos = np.where(mask == 0)

    crop[pos[0], pos[1]] = 0
    cv2.imwrite('data/text_removed/mdb{:03d}.png'.format(i), original)