In [56]:
import glob
import os
import numpy as np
import pandas as pd
import pydicom
import re

In [25]:
class Simple_encryptor:
    def __init__(self, filename):
        self.reorder = pd.read_csv(filename+'_reorder.csv').to_numpy()
        self.mapping_string = pd.read_csv(filename+'_mapping_string.csv').to_numpy()
        self.mapping_file = pd.read_csv(filename+'_mapping_filename.csv').to_numpy()
        
        # convert the reorder contents to int
        for col in range(1,3):
            for row in range(self.reorder.shape[0]):
                self.reorder[row][col] = np.array([int(k) for k in self.reorder[row][col].split(',')])
    
        self.dicom_erase_field = ['ReferringPhysicianName', 
                                  'PerformingPhysicianName', 
                                  'NameOfPhysiciansReadingStudy', 
                                  'OperatorsName', 
                                  'PatientSex', 
                                  'PatientBirthDate', 
                                  'PatientAge']
        self.dicom_remove_field = ['InstitutionalDepartmentName',
                                   'MilitaryRank', 
                                   'StationName', 
                                   'BranchOfService', 
                                   'AdditionalPatientHistory']
        self.dicom_encrypt_field = ['AccessionNumber', 
                                    'PatientName',
                                    'InstitutionName', 
                                    'PatientID']
    
    def Encrypt_string(self, string, mapping = None):
        if mapping is None:
            mapping = self.mapping_string
        m = mapping
        
        string = str(string)
        string = np.array(list(string))[self.reorder[len(string)-1][1]]
        string = ''.join([m[np.where(m[:, 0] == s)[0][0]][1] if s in m[:, 0] else s for s in string])
        
        return string
    
    def Decrypt_string(self, string, mapping = None):
        if mapping is None:
            mapping = self.mapping_string
        m = mapping
        
        string = str(string)
        string = np.array(list(string))[self.reorder[len(string)-1][2]]
        string = ''.join([m[np.where(m[:, 1] == s)[0][0]][0] if s in m[:, 1] else s for s in string])
        
        return string
    
    def Encrypt_filename(self, filename, level = 1):
        '''
        @param: level: for a path, how many directories to be encrypted from the farest one
        '''
        m = self.mapping_file
        
        basenames = []
        for i in range(level):
            basenames.append(self.Encrypt_string(os.path.basename(filename).lower(), self.mapping_file))
            filename = os.path.dirname(filename)
        
        return os.path.join(*([filename] + basenames[::-1]))
    
    def Decrypt_filename(self, filename, level = 1):
        '''
        @param: level: for a path, how many directories to be encrypted from the farest one
        '''
        m = self.mapping_file
        
        basenames = []
        for i in range(level):
            basenames.append(self.Decrypt_string(os.path.basename(filename).lower(), self.mapping_file))
            filename = os.path.dirname(filename)
        
        return os.path.join(*([filename] + basenames[::-1]))
    
    def Encrypt_dicom(self, dcm):
        for field in self.dicom_erase_field:
            if field in dcm:
                setattr(dcm, field, ' ')
        
        for field in self.dicom_remove_field:
            if field in dcm:
                delattr(dcm, field)
        
        for field in self.dicom_encrypt_field:
            if field in dcm:
                # use lower only encryption, because usually the folder will be named with mrn and acc. This will avoid potential conflict in Windows
                string = str(getattr(dcm, field)).lower()
                setattr(dcm, field, self.Encrypt_string(string, self.mapping_file))
        
        return dcm
    
    def Get_folder_name(self, dcm):
        return '_'.join([dcm.PatientID, dcm.AccessionNumber])

In [127]:
# simple encryptor, convert 0-9, a-z, A-Z to their corresponding ascii hex.
# A bias between 00 to FF is added to each char and then mod 256. The bias changes at different positions
# A encrypted string begins and ends with M and end with N.
# If the enclosed string begins with K (embeded key), then the next two letters are the key of the encryption;
# If the enclosed string begins with L (hidden key), then the next two letters are just the length of string.
class ascii_encryptor:
    def __init__(self, key = 0):
        self.key = key
        
        self.dicom_erase_field = ['ReferringPhysicianName', 
                                  'PerformingPhysicianName', 
                                  'NameOfPhysiciansReadingStudy', 
                                  'OperatorsName', 
                                  'PatientSex', 
                                  'PatientBirthDate', 
                                  'PatientAge']
        self.dicom_remove_field = ['InstitutionalDepartmentName',
                                   'MilitaryRank', 
                                   'StationName', 
                                   'BranchOfService', 
                                   'AdditionalPatientHistory']
        self.dicom_encrypt_field = ['AccessionNumber', 
                                    'PatientName',
                                    'InstitutionName', 
                                    'PatientID']
    
    def encode_string(self, string, mode = 'K'):
        key = self.key % 256
        bias = np.arange(len(string)) - (key * len(string)) % 256
        encrypt_array = ['0x{:02x}'.format((ord(string[i]) + bias[i]) % 256)[2:].upper() for i in range(len(string))]
        if mode == 'K':
            encrypt_array = ['M', 'K', '0x{:02x}'.format(key)[2:].upper()] + encrypt_array + ['N']
        else:
            encrypt_array = ['M', 'L', '0x{:02x}'.format(len(encrypt_array) % 256)[2:].upper()] + encrypt_array + ['N']
        
        return ''.join(encrypt_array)
    
    def __decode_single_string(self, string):
        if string[0] == 'K':
            key = int(string[1:3], 16)
        else:
            key = self.key
        
        string = string[3:]
        bias = np.arange(len(string) // 2) - (key * len(string) // 2) % 256
        dec_array = [chr((int(string[i:i+2], 16) - bias[i//2])%256) for i in range(0, len(string), 2)]
        
        return ''.join(dec_array)
    
    def decode_string(self, string):
        tokens = re.findall('M[KL][0-9A-F]*N', string)
        dec_tokens = [self.__decode_single_string(t[1:-1]) for t in tokens]
        for t,d in zip(tokens, dec_tokens):
            string = string.replace(t,d)
        
        return string
    
    def encrypt_dicom(self, dcm, mode = 'K'):
        for field in self.dicom_erase_field:
            if field in dcm:
                setattr(dcm, field, ' ')
        
        for field in self.dicom_remove_field:
            if field in dcm:
                delattr(dcm, field)
        
        for field in self.dicom_encrypt_field:
            if field in dcm:
                # use lower only encryption, because usually the folder will be named with mrn and acc. This will avoid potential conflict in Windows
                string = str(getattr(dcm, field)).lower()
                setattr(dcm, field, self.encode_string(string, mode))
        
        return dcm

In [3]:
# generate two mapping charts: reorder and remapping (0-9, space and a-z. Everything is converted to lowercase)
# this is not a strong encryption, but should be enough for deidentification purpose

In [4]:
if __name__ == '__main__':
    filename = 'aneurysm2'
    np.random.seed(int.from_bytes(bytes(filename, 'utf-8'), 'little') % 2**32)
    
    # generate reorder chart
    records = []
    for i in range(255):
        forward = np.arange(i+1)
        np.random.shuffle(forward)
        backward = np.argsort(forward)
        
        fstr = ','.join([str(k) for k in forward])
        bstr = ','.join([str(k) for k in backward])
        
        records.append([i+1, fstr, bstr])
    pd.DataFrame(records, columns=['length', 'forward', 'backward']).to_csv(filename+'_reorder.csv', index=False)
    
    # generate character re-mapping chart, full
    src_chars = [chr(k) for k in list(range(48, 58)) + list(range(65,91)) + list(range(97,123))]
    dst_chars = np.copy(src_chars)
    np.random.shuffle(dst_chars)
    pd.DataFrame({'source': src_chars, 'target': dst_chars}).to_csv(filename+'_mapping_string.csv', index=False)
    
    src_chars = [chr(k) for k in list(range(48, 58)) + list(range(97,123))]
    dst_chars = np.copy(src_chars)
    np.random.shuffle(dst_chars)
    pd.DataFrame({'source': src_chars, 'target': dst_chars}).to_csv(filename+'_mapping_filename.csv', index=False)

In [9]:
if __name__ == '__main__':
    filename = 'aneurysm2'
    f = Simple_encryptor(filename)
    
    s1 = f.Encrypt_string('MICKEY')
    s2 = f.Decrypt_string(s1)
    print (s1, s2)
    
    s1 = f.Encrypt_filename('/home/dwu/data/DZH_DATA/2018/Aneurysm/SAH-/1225515/9272310 - CT - NEURO - CTANGH-3D', 2)
    s2 = f.Decrypt_filename(s1, 2)
    print (s1)
    print (s2)
    
    dcm_files = glob.glob('/home/dwu/data/DZH_DATA/2018/Aneurysm/SAH-/1225515/*/*/*.dcm')
    img = pydicom.dcmread(dcm_files[0], force=True)
    img_enc = f.Encrypt_dicom(img)

Fg3If0 MICKEY
/home/dwu/data/DZH_DATA/2018/Aneurysm/SAH-/s99ssww/i_-m95brw_3yqj2c_6i2qq0q_5uqpquw
/home/dwu/data/DZH_DATA/2018/Aneurysm/SAH-/1225515/9272310 - ct - neuro - ctangh-3d


IndexError: list index out of range

In [155]:
if __name__ == '__main__':
    import subprocess
    subprocess.check_call(['jupyter', 'nbconvert', '--to', 'script', 'encrpytion'])