In [None]:
# !pip install pycuda

Collecting pycuda
  Downloading pycuda-2021.1.tar.gz (1.7 MB)
[?25l[K     |▏                               | 10 kB 29.3 MB/s eta 0:00:01[K     |▍                               | 20 kB 35.0 MB/s eta 0:00:01[K     |▋                               | 30 kB 40.1 MB/s eta 0:00:01[K     |▉                               | 40 kB 30.2 MB/s eta 0:00:01[K     |█                               | 51 kB 18.1 MB/s eta 0:00:01[K     |█▏                              | 61 kB 15.5 MB/s eta 0:00:01[K     |█▍                              | 71 kB 14.4 MB/s eta 0:00:01[K     |█▋                              | 81 kB 15.8 MB/s eta 0:00:01[K     |█▊                              | 92 kB 14.7 MB/s eta 0:00:01[K     |██                              | 102 kB 15.6 MB/s eta 0:00:01[K     |██▏                             | 112 kB 15.6 MB/s eta 0:00:01[K     |██▍                             | 122 kB 15.6 MB/s eta 0:00:01[K     |██▌                             | 133 kB 15.6 MB/s eta 0:00:01[K 

In [5]:
import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.driver as cuda
import numpy as np
import pycuda.gpuarray as gpuarray 
import sys
import os
import time

In [6]:
# from google.colab import drive
# drive.mount("/content/drive/")

# path = "/content/drive/My Drive"
# sys.path.append(path)
# os.chdir(path)
# %cd CUDA_OpenCL/

In [17]:
class cudaCipher:
    def __init__(self):
        """
        Attributes for instance of deviceAdd module
        Includes kernel code and input variables.
        """
        # If you are using any helper function to make 
        # blocksize or gridsize calculations, you may define them
        # here as lambda functions. 
        # Quick lambda function to calculate grid dimensions
        self.cal_grid_dim = lambda x, blockX: int(np.ceil(x / blockX))
        # define block and grid dimensions
        #
        self.block = (1024, 1, 1)
        
        # kernel code wrapper
        #
        kernelwrapper = """
        __global__ void de_cipher(char* out, char* inp, const int n){
            int idx = threadIdx.x + blockIdx.x * blockDim.x;
            if(idx < n){
                char cur = inp[idx];
                if(cur - 'a' >= 0 && cur - 'a' <= 12)
                  out[idx] = cur + 13;
                else if(cur - 'n' >= 0 && cur - 'n' <= 12)
                  out[idx] = cur - 13;
                else
                  out[idx] = cur;
            }
              
            // __syncthreads();
        }
        """
        self.kernel = SourceModule(kernelwrapper)
        # Compile the kernel code when an instance
        # of this class is made.

    
    def devCipher(self, sentence):
        """
        Function to perform on-device parallel ROT-13 encrypt/decrypt
        by explicitly allocating device memory for host variables using
        gpuarray.
        Returns
            out                             :   encrypted/decrypted result
            time_ :   execution time in milliseconds
        """
        # Get kernel function
        decipher_func = self.kernel.get_function("de_cipher")
        # Device memory allocation for input and output array(s)
        start = cuda.Event()
        end = cuda.Event()
        length = len(sentence)
        # size = length*sys.getsizeof(sentence)

        sentence = np.char.asarray(sentence) # "good morning" -> np.ndarray('good morning')
        size = sentence.nbytes


        start.record()
        sentence_gpu = gpuarray.to_gpu(sentence)
        output = np.empty_like(sentence)
        output_gpu = gpuarray.to_gpu(output)
        grid = (self.cal_grid_dim(size, self.block[0]), 1, 1)
        # Record execution time and execute operation.
        event = decipher_func(output_gpu, sentence_gpu, np.int32(size), block=self.block, 
                    grid=grid)
        # Wait for the event to complete
        end.record()
        end.synchronize()
        # Fetch result from device to host
        output_cpu = output_gpu.get()
        # Convert output array back to string
        decrypted = str(output_cpu[0])
        time_ = start.time_till(end)
        return decrypted, time_

    
    def pyCipher(self, sentence):
        """
        Function to perform parallel ROT-13 encrypt/decrypt using 
        vanilla python.

        Returns
            decrypted                       :   encrypted/decrypted result
            time_         :   execution time in milliseconds
        """
        decrypted = ""
        start = time.time()
        for char in sentence:
            if 0 <= ord(char) - ord('n') <= 12:
                decrypted += chr(ord(char) - 13)
            elif 0 <= ord(char) - ord('a') <= 12:
                decrypted += chr(ord(char) + 13)
            else:
                decrypted += char
        end = time.time()
        time_ = end - start
        return decrypted, time_

In [19]:
if __name__ == "__main__":
    # Main code

    # Open text file to be deciphered.
    # Preprocess the file to separate sentences
    with open("deciphertext.txt", 'r') as f:
        contents = f.read().split(".")
    f.close()
    # Split string into list populated with '.' as delimiter.

    # Empty lists to hold deciphered sentences, execution times
    full_deciphered_text_dev = ""
    time_devs = []
    full_deciphered_text_py = ""
    time_pys = []
    instance = cudaCipher()
    # Loop over each sentence in the list
    for sentence in contents:
        decrypted_dev, time_dev = instance.devCipher(sentence)
        decrypted_py, time_py = instance.pyCipher(sentence)
    # post process the string(s) if required
        full_deciphered_text_dev += decrypted_dev + "."
        full_deciphered_text_py += decrypted_py + '.'
        time_devs.append(time_dev * 1000)
        time_pys.append(time_py * 1000)
    full_deciphered_text_dev = full_deciphered_text_dev.strip('.')
    full_deciphered_text_py = full_deciphered_text_py.strip('.')
    tc = sum(time_devs) / len(time_devs)
    tp = sum(time_pys) / len(time_pys)
    # Execution time
    print("CUDA output cracked in ", tc, " milliseconds per sentence.")
    print("Python output cracked in ", tp, " milliseconds per sentence.")
    equal = True
    # Error check
    try:
        print("Checkpoint: Do python and kernel decryption match? Checking...")
        assert full_deciphered_text_dev == full_deciphered_text_py
        
    except AssertionError:
        print("Checkpoint failed: Python and CUDA kernel decryption do not match. Try Again!")
        # dump bad output to file for debugging
        equal = False
        raise AssertionError
        

    # If ciphers agree, proceed to write decrypted text to file
    # and plot execution times
    
    if equal:
        print("Checkpoint passed!")
        print("Writing decrypted text to file...")

        # Write cuda output to file
        with open("deciphered_cuda.txt", 'w') as g:
          g.write(full_deciphered_text_dev)
        g.close()
        # Dot plot the  per-sentence execution times
        plt.figure()
        plt.plot(time_devs, linewidth=2, color="red", label="opencl decrypted")
        plt.plot(time_pys, linewidth=2, color="blue", label="python decrypted")
        plt.xlabel("sentence index")
        plt.ylabel("time in milliseconds")
        plt.title("decipher time comparison")
        plt.legend()
        plt.grid()
        # Scatter plot the  per-sentence execution times 
        plt.show()  

CUDA output cracked in  474.7272729873657  milliseconds per sentence.
Python output cracked in  0.08515878157182173  milliseconds per sentence.
Checkpoint: Do python and kernel decryption match? Checking...
Checkpoint passed!
Writing decrypted text to file...


NameError: name 'plt' is not defined