<a href="https://colab.research.google.com/github/yumakemore/Cybersecurity-Syscall_Mimicry_Using_LSTM/blob/master/Cybersecurity_Syscall_Mimicry_using_LSTM_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Cybersecurity-Syscall_Mimicry_Using_LSTM

Byunggu Yu, Ph.D.

May 2017

Deep LSTM Long Short Term Neural Network compiling a syscall mimicry.



In [0]:
import numpy as np
import re	# Regix
import sys	
from scipy.special import expit as sigmoid
import matplotlib.pyplot as plt


#============= Global Constants and Variables ================
L=2			# number of hidden layers
hlayer_size = 256 	# number of neurons in a hidden layer
num_steps = 25 		# BPTT step size
learning_rate = 0.01
init_scale = 0.1	# initial W is random uniform [-init_scale, init_scale]

data=None
vocab_size=None
mode=None
word_to_idx=None
idx_to_word=None

wL=None
bL=None

W=[None]*L
H=[None]*L
B=[None]*L

last_c=[None]*L
last_h=[None]*L

#=============================================================


In [0]:
def setup(infile):

	global data, vocab_size, mode, word_to_idx, idx_to_word
	global W, wL
	global B, bL
	global last_h, last_c 

	#======= Read in data
	try:
		data = open(infile, 'r').read() 
	except:
		print ("Failed to read the inpur file\n")
		exit(0)


	#======== Choose either word-level processing or char-level processing

	#--- Use the following for the char-level processing
	#mode="c"
	#words = list(set(data))
	#---------------------------------------------------

	#--- Use the following for the word-level processing
	mode="w"
	# Optional text pre-processor (customizable and removable)
	if mode=="w":
		data=re.sub('([\\a\\b\\f\\n\\r\\t\\v])', r' \1 ', data)
		data=re.sub('\s{2,}', ' ', data)
	data=list(data.split(" "))
	words = list(set(data))
	#---------------------------------------------------


	try:
		words.remove("")
	except:
		pass

	data_size, vocab_size = len(data), len(words)
	print('data has %d words, %d unique.' % (data_size, vocab_size))
	input("press a key to start:\n")
	word_to_idx = { word:idx for idx,word in enumerate(words) }
	idx_to_word = { idx:word for idx,word in enumerate(words) }

	#======= w, u, and b of each hidden layer
	W[0] = np.random.uniform(low=-init_scale, high=init_scale, size=(hlayer_size*4, vocab_size+hlayer_size))
	B[0] = np.zeros((hlayer_size*4, 1))
	for k in range(1,L): # for each hidden layer k,
		W[k] = np.random.uniform(low=-init_scale, high=init_scale, size=(hlayer_size*4, hlayer_size*2))
		B[k] = np.zeros((hlayer_size*4, 1))
		last_c[k] = np.zeros((hlayer_size, 1))
		last_h[k] = np.zeros((hlayer_size, 1))
	#======= w and b of the readout layer
	wL = np.random.uniform(low=-init_scale, high=init_scale, size=(vocab_size, hlayer_size)) 
	bL = np.zeros((vocab_size, 1)) 


In [0]:
def ForwardBackward(inputs, targets):
  global last_c, last_h 
  x, y = {}, {}
  h, f, i, a, o, dh = {}, {}, {}, {}, {}, {}
  c = {}
 
  h[-1] = np.copy(last_h)
  
  sumOfallC = 0


  #=========== Feedforward Through Time (FFTT-SRN)

  #--- Memory Preparation
  for t in range(0, len(inputs)):
    h[t]=[None]*(L+1) # [-1] to store x
    f[t]=[None]*L
    i[t]=[None]*L
    a[t]=[None]*L
    o[t]=[None]*L
    c[t]=[None]*L

  c[-1]=np.copy(last_c)
  #---

  for t in range(0,len(inputs)): # t is reset of zero for truncated BPTT
    x[t] = np.zeros((vocab_size,1)) 
    x[t][inputs[t]] = 1 # one-hot representation

    #--- Hidden Layers ---
    h[t][-1]=x[t]
    for l in range(0,L):
      H = np.concatenate((h[t][l-1],h[t-1][l]),axis=0)
      z=np.dot(W[l], H) + B[l]

      unit=int(z.shape[0]/4)
      f[t][l]=sigmoid(z[0:unit])
      i[t][l]=sigmoid(z[unit:2*unit])
      a[t][l]=np.tanh(z[2*unit:3*unit])
      o[t][l]=sigmoid(z[3*unit:4*unit])

      c[t][l]=f[t][l]*c[t-1][l]+i[t][l]*a[t][l] 
      h[t][l]=o[t][l]*np.tanh(c[t][l]) 

    #--- Readout Layer ---
    zL=np.dot(wL, h[t][L-1]) + bL
    y[t] = zL # logit
    y[t] = np.exp(y[t]) / np.sum(np.exp(y[t])) # omaga: softmax normalization
    sumOfallC += -np.log(y[t][targets[t],0]) # loss in cross entropy for each target in one-hot representation

  #--- store the last c and h for the next step sequence
  last_c=np.copy(c[len(inputs)-1])
  last_h=np.copy(h[len(inputs)-1])


  #=========== Truncated Backpropagation Through Time (Truncated BPTT-SRN)
  #--- Memory Preparation
  for t in range(0, len(inputs)):
    dh[t]=[None]*L
  #-----------------------
  #---- initialize db, dw, du ----
  dwL = np.zeros_like(wL) 
  dbL = np.zeros_like(bL) 
  dW=[None]*L
  dB=[None]*L
  for k in range(0,L): # for each hidden layer k,
	  dW[k] = np.zeros_like(W[k])
	  dB[k] = np.zeros_like(B[k])
  #--------------------------------

  for t in reversed(range(0,len(inputs))):
    dz=np.copy(y[t])
    dz[targets[t]] -= 1 # (y - target) : for the one-hot representation case
    
    dbL += dz

    dwL += np.dot(dz, h[t][L-1].T)

    if t==len(inputs)-1:
      dh[t][L-1]=np.dot(wL.T, dz)

    for l in reversed(range(0,L)):
      do=dh[t][l]*np.tanh(c[t][l])
      dc=dh[t][l]*(1-np.tanh(c[t][l])*np.tanh(c[t][l]))
      da=dc*i[t][l]
      di=dc*a[t][l]
      df=dc*c[t-1][l]

      dzf=df*f[t][l]*(1-f[t][l])
      dzi=di*i[t][l]*(1-i[t][l])
      dza=da*(1-a[t][l]*a[t][l]) 
      dzo=do*o[t][l]*(1-o[t][l])

      dz=np.concatenate((dzf, dzi, dza, dzo), axis=0)
      H = np.concatenate((h[t][l-1],h[t-1][l]),axis=0)
      dB[l]+=dz
      dW[l]+=np.dot(dz,H.T)
      dH=np.dot(W[l].T,dz)
      if l>0:
        dh[t][l-1]=dH[0:int(dH.shape[0]/2)]
      if t>0:
        dh[t-1][l]=dH[int(dH.shape[0]/2):dH.shape[0]]

  #--- Optional Clipping
  for dparam in [dwL, dbL]:
    np.clip(dparam, -5, 5, out=dparam) 
  for dparam in dW:
    np.clip(dparam, -5, 5, out=dparam) 
  for dparam in dB:
    np.clip(dparam, -5, 5, out=dparam) 
  #---------------------

  return sumOfallC, dwL, dbL, dW, dB

In [0]:
def creation(beginning_idx, n):
  """ 
  Auto-generation/creation mode
  """
  c=np.copy(last_c)
  h=np.copy(last_h)

  h_l_minus_1 = np.zeros((vocab_size,1))
  h_l_minus_1[beginning_idx] = 1

  idxes = []
  idxes.append(beginning_idx)
  for t in range(0,n): 
    x_ravel=np.copy(h_l_minus_1)
    for l in range(0,L):
      H = np.concatenate((h_l_minus_1,h[l]),axis=0)
      z=np.dot(W[l], H) + B[l]

      unit=int(z.shape[0]/4)
      f=sigmoid(z[0:unit])
      i=sigmoid(z[unit:2*unit])
      a=np.tanh(z[2*unit:3*unit])
      o=sigmoid(z[3*unit:4*unit])
      c[l]+=f*c[l]+i*a 

      h[l]=o*np.tanh(c[l]) 
      h_l_minus_1=np.copy(h[l])

    z = np.dot(wL, h[L-1]) + bL
    y = np.exp(z) / np.sum(np.exp(z))
	
    #input("Press a key")
    y_ravel=y.ravel()
    if t==0:
    	idx = np.argmax(y_ravel)
    else:
      idx = np.random.choice(range(vocab_size), p=y.ravel())
      #idx = np.argmax(y_ravel)
    '''
    if x_ravel[idx] == 1: #do not output the input (no repeating)
      y_ravel[idx]=0
      idx=np.argmax(y_ravel)
    '''
    h_l_minus_1 = np.zeros((vocab_size, 1))
    h_l_minus_1[idx] = 1
    idxes.append(idx)

  '''
  This creation is outside the sequence of the given data and 
  to be forgotten. Therefore last_c and last_h are not updated.
  '''
  return idxes

In [0]:
#==========================================================================#
if __name__ == "__main__":
#==========================================================================#

  #=== Pyplot variables
  plt.figure("Syscall Test")
  #plt.ion()
  plot_x=[]
  plot_y1=[]
  plot_y2=[]
  #====================
  
  '''
  if len(sys.argv) != 2:
    print("Usage: simple-rnn.py <input file>\n")
    exit(0)
  
  setup(str(sys.argv[1]))
  '''
  
  print("This program properly runs on Python 3.x\n")
  input_f = input("Give a data file in a plain text file (e.g., ./test-fixed.txt): \n")
  setup(input_f)
  
  num_of_steps = 0
  data_read_point = 0
  #---- initialize mw, mu, and mb for Adagrad ----
  mwL=None
  mbL=None
  mW=[None]*L
  mB=[None]*L

  mwL = np.zeros_like(wL) 
  mbL = np.zeros_like(bL) 
  for k in range(0,L): # for each hidden layer k,
    mW[k] = np.zeros_like(W[k])
    mB[k] = np.zeros_like(B[k]) 
  #------------------------------------------------
  ea_C = -np.log(1.0/vocab_size) # loss at iteration 0
  print('Beginning C and perplexity: %f, %f' % (ea_C, np.exp(ea_C)))
  
  epoch=-1
  for l in range(0,L):
    last_h[l] = np.zeros((hlayer_size,1)) # reset memory
    last_c[l] = np.zeros((hlayer_size,1)) # reset memory

  while True:
    # prepare X
    if data_read_point+num_steps >= len(data) or num_of_steps == 0:
      if epoch >= 0:
        #==== Pyplot Epoch-End Plotting
        plt.clf()
        plt.axis([0,num_of_steps,0,vocab_size])
        #plt.axis([0,num_of_steps,0,67])
        #plt.yscale('log') #To active this, adjust y to 10^something or remove in axis setting
        plt.plot(plot_x, plot_y1, label="Raw")
        plt.plot(plot_x, plot_y2, label="Exp Avg")
        plt.title("Perplexity")
        plt.xlabel('Steps')
        plt.legend(loc='upper right')
        plt.pause(0.0001)
        #plt.show()
        plt.draw()
        #====================
      epoch += 1
      '''
      for l in range(0,L):
        last_h[l] = np.zeros((hlayer_size,1)) # reset memory
        last_c[l] = np.zeros((hlayer_size,1)) # reset memory
      '''
      data_read_point = 0
    try:
      X = [word_to_idx[word] for word in data[data_read_point:data_read_point+num_steps]]
      Y = [word_to_idx[word] for word in data[data_read_point+1:data_read_point+(num_steps+1)]]
    except:
      print("Data Read Error at data_read_point=%d and num_steps=%d\n" %(data_read_point, num_steps))
      exit(0)
    
    #= One Step: Sequential feedforwarding of X one by one and one BPTT at the end of the step
    sumOfallC, dwL, dbL, dW, dB = ForwardBackward(X, Y)
    
    #= Convergence Report
    ea_C = ea_C * 0.999 + (sumOfallC/len(Y)) * 0.001
    
    #==== Pyplot Plotting Data
    plot_x.append(num_of_steps-1)
    plot_y1.append(np.exp(sumOfallC/len(Y)))
    plot_y2.append(np.exp(ea_C))

    if num_of_steps % 10 == 0 and num_of_steps > 0:
      print('Epoch %d, Step %d: C, perplexity= %8.4f, %8.2f ==> Exp. Avg: %8.4f, %8.2f' % 
            (epoch, num_of_steps, sumOfallC/len(Y), np.exp(sumOfallC/len(Y)), ea_C, np.exp(ea_C)))
    
      #==== Pyplot Plotting
      '''
      plot_x.append(num_of_steps-1)
      plot_y1.append(np.exp(sumOfallC/len(Y)))
      plot_y2.append(np.exp(ea_C))
      '''
      if num_of_steps % 100 == 0:
        plt.clf()
        plt.axis([0,num_of_steps,0,vocab_size])
        #plt.yscale('log') #To active this, adjust y to 10^something or remove in axis setting
        plt.plot(plot_x, plot_y1, label="Raw")
        plt.plot(plot_x, plot_y2, label="Exp Avg")
        plt.title("Perplexity")
        plt.xlabel('Steps')
        plt.legend(loc='upper right')
        plt.pause(0.0001)
        #plt.show()
        plt.draw()
      #====================

    #=============== Creation =============
    if num_of_steps % 100 == 0:
      creation_idx = creation(Y[-1], 5)
      if mode=="c":
        created = "Given: [..."
        created += ''.join(idx_to_word[idx] for idx in X[-2:-1])
        created += ("]")
        created += ''.join(idx_to_word[idx] for idx in creation_idx)
      elif mode=="w":
        created = "Given: [... "
        created += ' '.join(idx_to_word[idx] for idx in X[-2:-1])
        created += ("] ")
        created += ' '.join(idx_to_word[idx] for idx in creation_idx)
      else:
        print ("mode value is incorrect\n")
        exit(0)
      print("\n--My Mimicry--")
      print("%s" % created)
      print("---------------\n")
    #========================================
    
    #================== Adjust w, u, and b ====================
    for param, dparam, mem in zip([wL, bL],[dwL, dbL],[mwL, mbL]):
      mem += dparam * dparam
      param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update
    
    for param, dparam, mem in zip([W, B],[dW, dB],[mW, mB]):
      for k in range(0,L):
        mem[k] += dparam[k] * dparam[k]
        param[k] += -learning_rate * dparam[k] / np.sqrt(mem[k] + 1e-8) # adagrad update
    #===========================================================
    
    #data_read_point += num_steps
    data_read_point += 1
    num_of_steps += 1
    



In [0]:
  #=== Pyplot variables
  plt.figure("deepSRN.py -- Byunggu Yu")
  #====================

  #==== Pyplot Plotting
  plt.clf()
  #plt.axis([0,num_of_steps,0,vocab_size])
  plt.yscale('log')
  plt.plot(plot_x[0:40000], plot_y1[0:40000], label="Raw")
  plt.plot(plot_x[0:40000], plot_y2[0:40000], label="Exp Avg")
  plt.title("Perplexity")
  plt.xlabel('Steps')
  plt.legend(loc='upper right')
  #plt.pause(0.0001)
  plt.show()
  #plt.draw()
  #====================

NameError: ignored