In [44]:
import numpy as np
import pandas as pd
import scipy
from gym import error, spaces, utils
from collections import OrderedDict
from joblib import dump, load
import time

Due to my dimensions space is 330 I'm going to take only a part of them, specifically, the most significant permissions both benign and malicious

In [32]:
np.random.seed(2)
ACTIONS = scipy.math.factorial(N_STATES) # available actions, 13 combination of permissions 
EPSILON = 0.9 # greedy factor
ALPHA = 0.1  # learning rate
GAMMA = 0.9 # discount factor
MAX_EPISODES = 13 # maximum episodes
FRESH_TIME = 0.3 # fresh time for one move 
permissions_names= ['android.permission.INTERNET',                  
'android.permission.READ_PHONE_STATE',          
'android.permission.ACCESS_NETWORK_STATE',      
'android.permission.WRITE_EXTERNAL_STORAGE',    
'android.permission.ACCESS_WIFI_STATE',         
'android.permission.READ_SMS',                  
'android.permission.WRITE_SMS',                 
'android.permission.RECEIVE_BOOT_COMPLETED']
N_STATES = len(permissions_names) # the length of the dimensional world, the number of permissions
complete_permissions = pd.read_csv('../../datasets/android_permissions.csv', header=0, sep=';')

In [36]:
complete_permissions.drop(complete_permissions.index, inplace=True)
complete_permissions = pd.DataFrame(np.zeros((1,len(list(complete_permissions.columns.values)))), columns=list(complete_permissions.columns.values)).append(complete_permissions, ignore_index=True).copy()
complete_permissions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Columns: 331 entries, android to type
dtypes: float64(331)
memory usage: 2.7 KB


In [37]:
complete_permissions = complete_permissions.drop('type', axis=1).copy()
complete_permissions = complete_permissions.drop('android', axis=1).copy()

In [38]:
complete_permissions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Columns: 329 entries, android.app.cts.permission.TEST_GRANTED to test_permission
dtypes: float64(329)
memory usage: 2.6 KB


In [18]:
def build_q_table(n_states, actions):
    table = pd.DataFrame(
    np.zeros((actions, n_states)), # q_table initial values
    columns = permissions_names # permissions
    )
    return table

build_q_table(N_STATES, ACTIONS).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40320 entries, 0 to 40319
Data columns (total 8 columns):
android.permission.INTERNET                  40320 non-null float64
android.permission.READ_PHONE_STATE          40320 non-null float64
android.permission.ACCESS_NETWORK_STATE      40320 non-null float64
android.permission.WRITE_EXTERNAL_STORAGE    40320 non-null float64
android.permission.ACCESS_WIFI_STATE         40320 non-null float64
android.permission.READ_SMS                  40320 non-null float64
android.permission.WRITE_SMS                 40320 non-null float64
android.permission.RECEIVE_BOOT_COMPLETED    40320 non-null float64
dtypes: float64(8)
memory usage: 2.5 MB


In [31]:
def choose_action(state, q_table):
    # This is how to choose an action
    state_actions = q_table.iloc[state, :]
    if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
        action_name = np.random.choice(permissions_names)
    else:   # act greedy
        action_name = state_actions.idxmax()   # replace argmax to idxmax as argmax means a different function in newer version of pandas
    return action_name

#choose_action(1, build_q_table(N_STATES, ACTIONS))
choose_action(4, pd.DataFrame(
    np.random.rand(ACTIONS, (N_STATES)), # q_table initial values
    columns = permissions_names, # action's name
    ))

'android.permission.INTERNET'

Let's create the environment which interacts with the defense model. First, I'm going to load the model previously trained in <a href="data analysis - Android.ipynb">Android Notebook </a>

In [11]:
clf = load('defense.joblib') 

In [70]:
def get_env_feedback(S, A):
    # This is how agent will interact with the environment
    pred = clf.predict(A)
    if pred == 1:    # it is detected as benign
        if S == N_STATES - 2:
            R = 2
            S_ = 'terminal'            
        else:
            S_ = S + 1
            R = 1
    else:
        R = 0
        if S == 0:
            S_ = S  # reach the defense
        else:
            S_ = S - 1
    return S_, R   
    
get_env_feedback(1, complete_permissions)

(0, 0)

In [78]:
def update_env(S, episode, step_counter):
    # This is how environment be updated
    env_list = ['-']*(N_STATES-1) + ['D']   # '---------T' our environment
    if S == 'terminal':
        interaction = 'Episode %s: total_steps = %s' % (episode+1, step_counter)
        print('\r{}'.format(interaction), end='')
        time.sleep(2)
        print('\r                                ', end='')
    else:
        if N_STATES - S == 2:
            env_list[S] = 'M'
            interaction = ''.join(env_list)
            print('\r{}'.format(interaction), end='')
            time.sleep(FRESH_TIME)
        else:            
            env_list[S] = 'B'
            interaction = ''.join(env_list)
            print('\r{}'.format(interaction), end='')
            time.sleep(FRESH_TIME)

In [72]:
# The rl() method calls the Q Learning scenario
def rl():
    # main part of RL loop
    q_table = build_q_table(N_STATES, ACTIONS)
    for episode in range(MAX_EPISODES):
        a_permissions = complete_permissions.copy()
        step_counter = 0
        S = 0
        is_terminated = False
        update_env(S, episode, step_counter)
        while not is_terminated:
            for i in range(0,3):                
                A = choose_action(S, q_table)
                a_permissions.loc[0, A] = 1
            S_, R = get_env_feedback(S, a_permissions)  # take action & get next state and reward
            q_predict = q_table.loc[S, A]
            if S_ != 'terminal':
                q_target = R + GAMMA * q_table.iloc[S_, :].max()   # next state is not terminal
            else:
                q_target = R     # next state is terminal
                is_terminated = True    # terminate this episode

            q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
            S = S_  # move to next state

            update_env(S, episode, step_counter+1)
            step_counter += 1
    return q_table, a_permissions

In [84]:
# let's start it 
q_table, a_permissions = rl()
print('\r\nQ-table:\n')
print(q_table)

                                
Q-table:

       android.permission.INTERNET  android.permission.READ_PHONE_STATE  \
0                         0.000000                              0.00000   
1                         1.151759                              0.00000   
2                         0.000000                              0.11791   
3                         0.000000                              0.00000   
4                         0.000000                              0.00000   
5                         0.000000                              0.00000   
6                         0.000000                              0.00000   
7                         0.000000                              0.00000   
8                         0.000000                              0.00000   
9                         0.000000                              0.00000   
10                        0.000000                              0.00000   
11                        0.000000                       

In [74]:
a_permissions.iloc[0,]

android.app.cts.permission.TEST_GRANTED                          0.0
android.intent.category.MASTER_CLEAR.permission.C2D_MESSAGE      0.0
android.os.cts.permission.TEST_GRANTED                           0.0
android.permission.ACCESS_ALL_DOWNLOADS                          0.0
android.permission.ACCESS_ALL_EXTERNAL_STORAGE                   0.0
android.permission.ACCESS_BLUETOOTH_SHARE                        0.0
android.permission.ACCESS_CACHE_FILESYSTEM                       0.0
android.permission.ACCESS_CHECKIN_PROPERTIES                     0.0
android.permission.ACCESS_COARSE_LOCATION                        0.0
android.permission.ACCESS_CONTENT_PROVIDERS_EXTERNALLY           0.0
android.permission.ACCESS_DOWNLOAD_MANAGER                       0.0
android.permission.ACCESS_DOWNLOAD_MANAGER_ADVANCED              0.0
android.permission.ACCESS_DRM_CERTIFICATES                       0.0
android.permission.ACCESS_FINE_LOCATION                          0.0
android.permission.ACCESS_FM_RADIO

In [85]:
if clf.predict(a_permissions) == 1 :
    print("Malware developed")
else:
    print("Benign app")

Malware developed
