In [1]:
import gym
import math
import random
import numpy as np
import pandas as pd

from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import time

import collections
import copy

#env = gym.make('CartPole-v0').unwrapped

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# if gpu is to be used
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=1)

In [2]:
data_path = '/home/ubuntu/data/dataset/R3009_U5_V100/'
UIT = pd.read_csv(data_path + 'UIT.csv')
UIT

Unnamed: 0,u,i,day,time,viewtime,video_type,video_format,city,city_isp,client_ip,conn_type,device_type
0,365,3391,0,0,148,1030,101001,0,0,11807,1,2
1,203,5779,0,0,7,1030,10203,0,0,15068,1,2
2,208,4675,0,0,92,1035,10203,0,0,5375,1,2
3,159,332,0,0,56,1030,10202,0,0,5992,1,2
4,50,674,0,0,439,1030,10203,0,0,3468,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...
300978,483,6831,29,2591880,34,1030,10203,0,0,10010,1,2
300979,158,8448,29,2591880,34,1030,10203,0,0,23340,1,2
300980,483,6463,29,2591940,35,1030,10203,0,0,10010,1,2
300981,158,4715,29,2591940,34,1030,10203,0,0,23340,1,2


In [3]:
trainUIT = UIT[UIT['day']<18]
contentNum = len(UIT.i.drop_duplicates())
userNum = len(UIT.u.drop_duplicates())
contentNum,userNum,trainUIT

(10000,
 500,
           u     i  day     time  viewtime  video_type  video_format  city  \
 0       365  3391    0        0       148        1030        101001     0   
 1       203  5779    0        0         7        1030         10203     0   
 2       208  4675    0        0        92        1035         10203     0   
 3       159   332    0        0        56        1030         10202     0   
 4        50   674    0        0       439        1030         10203     0   
 ...     ...   ...  ...      ...       ...         ...           ...   ...   
 198170  264  7442   17  1555140        90        1035         10203     0   
 198171   19  9362   17  1555140       424        1035         10203     0   
 198172   82  9223   17  1555140        94        1037         10203     0   
 198173   35  4164   17  1555140        22        1030         10203     0   
 198174  239  5062   17  1555140        89        1035         10203     0   
 
         city_isp  client_ip  conn_type  device_

In [4]:
class ENV(object):
    def __init__(self,userNum,contentNum):
        self.userNum = userNum
        self.contentNum =contentNum

        self.r = np.zeros(shape=(userNum,contentNum),dtype=int)
        self.p = np.full(shape=contentNum,fill_value = 1/userNum)
        self.e = np.zeros(shape=contentNum)
        self.S = np.ones(shape=contentNum,dtype=int)
        self.l_edge = 0.1
        self.l_cp = 1

        self.B = np.full(shape=userNum,fill_value=5,dtype=int)

        self.pipe = collections.OrderedDict()


    #有序字典实现LRU
    def updateEgdeCache(self,action,t):
        for i in np.argwhere(action==1).squeeze(-1):
            if i in self.pipe.keys():
                self.pipe.pop(i)
            elif len(self.pipe) >= 500:
                self.e[self.pipe.popitem(last=False)[0]] = 0
            self.pipe[i] = t
            self.e[i] = 1

    
    def updateEnv(self,u,action,t):
        
        p_tmp = ((self.r[u] | action)-self.r[u])*(1/self.userNum) + self.p
        self.p = np.where(p_tmp<1-1/self.userNum,p_tmp,1-1/self.userNum)

        self.r[u] = self.r[u] | action

        self.updateEgdeCache(action,t)

    def getStatus(self):
        return (torch.from_numpy(self.r),
                torch.from_numpy(self.p) , 
                torch.from_numpy(self.e),
                torch.from_numpy(self.S),
                self.l_edge,
                self.l_cp)

    #def reset(self):
    #    self.r = np.zeros(shape=(self.userNum,self.contentNum),dtype=int)
    #    self.p = np.full(shape=self.contentNum,fill_value = 1/self.userNum)
    #    self.e = np.zeros(shape=self.contentNum)
    #    self.S = np.ones(shape=self.contentNum,dtype=int)
    #    self.l_edge = 0.1
    #    self.l_cp = 1
    #    self.B = np.full(shape=self.userNum,fill_value=15,dtype=int)
    #    self.pipe = collections.OrderedDict()


In [5]:
class UE_random(object):
    def __init__(self,u,env,rewardPara):
        self.u = u

        self.W = []
        self.v = torch.zeros(size=(env.contentNum,),dtype=int)

        self.Bu = int(env.B[self.u])
        self.contentNum = env.contentNum
        self.userNum = env.userNum

        self.r , self.p , self.e, self.S,self.l_edge, self.l_cp = env.getStatus()

        self.action = torch.zeros(size=(env.contentNum,),dtype=int)
        self.lastAction = self.action

        self.reward = 0
        self.ALPHAh = rewardPara['alpha']
        self.BETAo =  rewardPara['betao']
        self.BETAl =  rewardPara['betal']

    def updateViewContent(self,i):
        self.W.append(i)
        self.v[i] = 1

    
    def getReward(self,lastru,lastp,ru,p,i,action,S,Bu,l_edge,l_cp,e):

        self.Rh = - self.ALPHAh * (torch.log(ru * p + (1-ru) * (1-p)) - torch.log(lastru * lastp + (1-lastru) * (1-lastp))).sum()

        self.Ro =   self.BETAo * action[i] * (S[i] / Bu + ( e[i] * l_edge + ( 1-e[i] ) * l_cp ) / S[i])

        self.Rl =   self.BETAl * ( ( 1 - action[i] )  * ( l_cp - ( e[i] * l_edge + ( 1 - e[i] ) * l_cp ) ) ) / S[i]

        #self.Rh[i] = self.Rh[i] + self.Ro + self.Rl

        return  self.Rh+self.Ro+self.Rl

    def selectAction(self,env,uit):


        self.lastAction = self.action
        self.lastp = self.p
        self.lastr = self.r

        self.updateViewContent(uit[1])
        self.r , self.p , self.e, self.S, self.l_edge, self.l_cp = env.getStatus()
        
        self.reward = self.getReward(self.lastr[self.u],self.lastp,self.r[self.u],self.p,self.W[-1],self.lastAction,self.S,self.Bu,self.l_edge,self.l_cp,self.e)
        
        self.action = torch.zeros(size=(env.contentNum,),dtype=int)
        self.action[self.W[-1]] = 1

        actionIndex = list(torch.randint(0,self.contentNum,(self.Bu,)))
        
        if self.W[-1] not in actionIndex:
            actionIndex.pop()
        for index in actionIndex:
            self.action[index] = 1

        env.updateEnv(self.u,self.action.numpy(),uit[2])

        return self.action

In [6]:
num_episodes = 1
TARGET_UPDATE = 1
bestReward =  float("-inf")

env = ENV(userNum,contentNum)
UEs = {}
sumReward = np.zeros(3)
loss = 0
UEHit = np.zeros(userNum)
edgeHit = 0

rewardPara = {"alpha":1,"betao":0.5,"betal":0.5}

sumHitrate = np.zeros(UIT.shape[0]// 10000 +2)
UEHitrate = np.zeros(UIT.shape[0]// 10000 +2)
edgeHitrate = np.zeros(UIT.shape[0]// 10000 +2)
privacyReduction = np.zeros(UIT.shape[0]// 10000 +2)


for i_episode in range(num_episodes):
    # Initialize the environment and state

    for index,trace in UIT.iterrows():
        uit = trace.to_numpy()
        if uit[0] not in UEs:
            UEs[uit[0]] = UE_random(uit[0],env,rewardPara)

        ue = UEs[uit[0]]
        
        actionIndex = np.argwhere(ue.lastAction)
        if uit[1] in actionIndex:
            UEHit[uit[0]] += 1
        elif uit[1] in env.pipe.keys():
            edgeHit += 1

        ue.selectAction(env,uit)

        sumReward[0] += float(ue.Rh)
        sumReward[1] += float(ue.Rl)
        sumReward[2] += float(ue.Ro)

        if index % 10000 == 0 :
            psi = 0
            p = torch.from_numpy(env.p)
            for u in UEs:
                psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
            print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
            print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
            print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
            print()
            sumHitrate[int(index // 10000)]   = round((edgeHit+UEHit.sum())/(index+1),5)
            UEHitrate [int(index // 10000)]   = round(UEHit.sum()/(index+1),5)
            edgeHitrate [int(index // 10000)] = round(edgeHit/(index+1),5)
            privacyReduction [int(index // 10000)] = round(float(psi)/len(UEs),5)
    psi = 0
    p = torch.from_numpy(env.p)
    for u in UEs:
        psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
    print()
    print("----------------------------------------------------------------")
    print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
    print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
    print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
    print("----------------------------------------------------------------")
    print()

    sumHitrate [int(round(index / 10000,0))]  = round((edgeHit+UEHit.sum())/(index+1),5)
    UEHitrate  [int(round(index / 10000,0))]  = round(UEHit.sum()/(index+1),5)
    edgeHitrate[int(round(index / 10000,0))]  = round(edgeHit/(index+1),5)
    privacyReduction [int(round(index / 10000,0))] = round(float(psi)/len(UEs),5)

--Time: Sun Sep 19 13:55:23 2021 Episode: 0   Index: 0   Loss: 0.0 --
Reward: [0. 0. 0.] total reward: 0.0
UEHitrate: 0.0  edgeHitrate 0.0 sumHitrate 0.0  privacy: 1.86387

--Time: Sun Sep 19 13:55:36 2021 Episode: 0   Index: 10000   Loss: 0.0 --
Reward: [-1.7722e+00  5.9890e-02  1.5600e-03] total reward: -1.71075
UEHitrate: 0.0014  edgeHitrate 0.13359 sumHitrate 0.13499  privacy: 2.77671

--Time: Sun Sep 19 13:55:48 2021 Episode: 0   Index: 20000   Loss: 0.0 --
Reward: [-1.42213  0.06824  0.00209] total reward: -1.3518
UEHitrate: 0.00165  edgeHitrate 0.15199 sumHitrate 0.15364  privacy: 2.80516

--Time: Sun Sep 19 13:56:00 2021 Episode: 0   Index: 30000   Loss: 0.0 --
Reward: [-1.17208  0.06414  0.00229] total reward: -1.10564
UEHitrate: 0.0021  edgeHitrate 0.143 sumHitrate 0.1451  privacy: 2.75815

--Time: Sun Sep 19 13:56:12 2021 Episode: 0   Index: 40000   Loss: 0.0 --
Reward: [-1.04688  0.06291  0.00236] total reward: -0.98161
UEHitrate: 0.00232  edgeHitrate 0.1401 sumHitrate 0.14

In [7]:
sumHitrate, UEHitrate, edgeHitrate

(array([0.     , 0.13499, 0.15364, 0.1451 , 0.14242, 0.14128, 0.13926,
        0.13547, 0.13485, 0.13401, 0.133  , 0.13326, 0.13197, 0.13097,
        0.1325 , 0.13377, 0.13276, 0.13361, 0.13427, 0.13374, 0.13351,
        0.13461, 0.13395, 0.13357, 0.13316, 0.13357, 0.13345, 0.13254,
        0.13289, 0.13393, 0.13485, 0.     ]),
 array([0.     , 0.0014 , 0.00165, 0.0021 , 0.00232, 0.00268, 0.0026 ,
        0.0026 , 0.00257, 0.00267, 0.00272, 0.00274, 0.00285, 0.00302,
        0.00309, 0.00317, 0.00318, 0.00321, 0.00321, 0.00332, 0.0033 ,
        0.00338, 0.00342, 0.00346, 0.00348, 0.00352, 0.00357, 0.00363,
        0.00365, 0.00366, 0.00368, 0.     ]),
 array([0.     , 0.13359, 0.15199, 0.143  , 0.1401 , 0.1386 , 0.13666,
        0.13287, 0.13227, 0.13134, 0.13028, 0.13053, 0.12912, 0.12795,
        0.12941, 0.1306 , 0.12957, 0.13041, 0.13106, 0.13043, 0.13021,
        0.13124, 0.13054, 0.13011, 0.12968, 0.13005, 0.12988, 0.12891,
        0.12923, 0.13027, 0.13117, 0.     ]))

In [8]:
privacyReduction

array([1.86387, 2.77671, 2.80516, 2.75815, 2.7121 , 2.64912, 2.60369,
       2.56672, 2.52858, 2.48458, 2.43033, 2.38963, 2.36007, 2.32344,
       2.29293, 2.26416, 2.23086, 2.19814, 2.16298, 2.13354, 2.10366,
       2.07048, 2.03986, 2.00891, 1.98107, 1.95427, 1.92775, 1.90121,
       1.87534, 1.85052, 1.82469, 0.     ])

In [8]:
class UE_None(object):
    def __init__(self,u,env,rewardPara):
        self.u = u

        self.W = []
        self.v = torch.zeros(size=(env.contentNum,),dtype=int)

        self.Bu = int(env.B[self.u])
        self.contentNum = env.contentNum
        self.userNum = env.userNum

        self.r , self.p , self.e, self.S,self.l_edge, self.l_cp = env.getStatus()

        self.action = torch.zeros(size=(env.contentNum,),dtype=int)
        self.lastAction = self.action

        self.reward = 0
        self.ALPHAh = rewardPara['alpha']
        self.BETAo =  rewardPara['betao']
        self.BETAl =  rewardPara['betal']

    def updateViewContent(self,i):
        self.W.append(i)
        self.v[i] = 1

    
    def getReward(self,lastru,lastp,ru,p,i,action,S,Bu,l_edge,l_cp,e):

        self.Rh = - self.ALPHAh * (torch.log(lastru * lastp + (1-lastru) * (1-lastp)).sum() - torch.log(ru * p + (1-ru) * (1-p)).sum())

        self.Ro =   self.BETAo * action[i] * (S[i] / Bu + ( e[i] * l_edge + ( 1-e[i] ) * l_cp ) / S[i])

        self.Rl =   self.BETAl * ( ( 1 - action[i] )  * ( l_cp - ( e[i] * l_edge + ( 1 - e[i] ) * l_cp ) ) ) / S[i]

        #self.Rh[i] = self.Rh[i] + self.Ro + self.Rl

        return  self.Rh+self.Ro+self.Rl

    def selectAction(self,env,uit):

        self.lastAction = self.action
        self.lastp = self.p
        self.lastr = self.r

        self.updateViewContent(uit[1])
        self.r , self.p , self.e, self.S, self.l_edge, self.l_cp = env.getStatus()
        
        self.reward = self.getReward(self.lastr[self.u],self.lastp,self.r[self.u],self.p,self.W[-1],self.lastAction,self.S,self.Bu,self.l_edge,self.l_cp,self.e)
        
        self.action = torch.zeros(size=(env.contentNum,),dtype=int)
        self.action[self.W[-1]] = 1

        env.updateEnv(self.u,self.action.numpy(),uit[2])

        return self.action

In [16]:
num_episodes = 1
TARGET_UPDATE = 1
bestReward =  float("-inf")

env = ENV(userNum,contentNum)
UEs = {}
sumReward = np.zeros(3)
loss = 0
UEHit = np.zeros(userNum)
edgeHit = 0

rewardPara = {"alpha":1,"betao":0.5,"betal":0.5}


for i_episode in range(num_episodes):
    # Initialize the environment and state

    for index,trace in trainUIT.iterrows():
        uit = trace.to_numpy()
        if uit[0] not in UEs:
            UEs[uit[0]] = UE_None(uit[0],env,rewardPara)

        ue = UEs[uit[0]]
        
        actionIndex = np.argwhere(ue.lastAction)
        if uit[1] in actionIndex:
            UEHit[uit[0]] += 1
        elif uit[1] in env.pipe.keys():
            edgeHit += 1

        ue.selectAction(env,uit)

        sumReward[0] += float(ue.Rh)
        sumReward[1] += float(ue.Rl)
        sumReward[2] += float(ue.Ro)

        if index % 10000 == 0 :
            psi = 0
            p = torch.from_numpy(env.p)
            for u in UEs:
                psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
            print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
            print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
            print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
            print()
            sumHitrate[int(index // 10000)]   = round((edgeHit+UEHit.sum())/(index+1),5)
            UEHitrate [int(index // 10000)]   = round(UEHit.sum()/(index+1),5)
            edgeHitrate [int(index // 10000)] = round(edgeHit/(index+1),5)

    psi = 0
    p = torch.from_numpy(env.p)
    for u in UEs:
        psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
    print()
    print("----------------------------------------------------------------")
    print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
    print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
    print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
    print("----------------------------------------------------------------")
    print()

    

    

--Time: Wed Sep 15 22:04:35 2021 Episode: 0   Index: 0   Loss: 0.0 --
Reward: [0. 0. 0.] total reward: 0.0
UEHitrate: 0.0  edgeHitrate 0.0 sumHitrate 0.0  privacy: 1.0

--Time: Wed Sep 15 22:04:45 2021 Episode: 0   Index: 10000   Loss: 0.0 --
Reward: [0.10938 0.1794  0.00074] total reward: 0.28952
UEHitrate: 0.0018  edgeHitrate 0.39936 sumHitrate 0.40116  privacy: 1.0

--Time: Wed Sep 15 22:04:55 2021 Episode: 0   Index: 20000   Loss: 0.0 --
Reward: [0.03867 0.18685 0.00087] total reward: 0.22639
UEHitrate: 0.00195  edgeHitrate 0.41593 sumHitrate 0.41788  privacy: 1.0

--Time: Wed Sep 15 22:05:05 2021 Episode: 0   Index: 30000   Loss: 0.0 --
Reward: [0.01672 0.19601 0.00083] total reward: 0.21356
UEHitrate: 0.0024  edgeHitrate 0.43615 sumHitrate 0.43855  privacy: 1.0

--Time: Wed Sep 15 22:05:15 2021 Episode: 0   Index: 40000   Loss: 0.0 --
Reward: [0.01123 0.19757 0.00101] total reward: 0.20981
UEHitrate: 0.00247  edgeHitrate 0.43974 sumHitrate 0.44221  privacy: 1.0

--Time: Wed Sep 1

In [25]:
num_episodes = 1
TARGET_UPDATE = 1
bestReward =  float("-inf")

env = ENV(userNum,contentNum)
UEs = {}
sumReward = np.zeros(3)
loss = 0
UEHit = np.zeros(userNum)
edgeHit = 0

rewardPara = {"alpha":1,"betao":0.5,"betal":0.5}

sumHitrate = np.zeros(10)
UEHitrate = np.zeros(10)
edgeHitrate = np.zeros(10)
privacyReduction = np.zeros(10)


for i_episode in range(num_episodes):
    # Initialize the environment and state

    for index,trace in UIT.iterrows():
        uit = trace.to_numpy()
        if uit[0] not in UEs:
            UEs[uit[0]] = UE_None(uit[0],env,rewardPara)

        ue = UEs[uit[0]]
        
        actionIndex = np.argwhere(ue.lastAction)
        if uit[1] in actionIndex:
            UEHit[uit[0]] += 1
        elif uit[1] in env.pipe.keys():
            edgeHit += 1

        ue.selectAction(env,uit)

        sumReward[0] += float(ue.Rh)
        sumReward[1] += float(ue.Rl)
        sumReward[2] += float(ue.Ro)

        if index % 10000 == 0 :
            psi = 0
            p = torch.from_numpy(env.p)
            for u in UEs:
                psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
            print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
            print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
            print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
            print()
            sumHitrate[int(index // 10000)]   = round((edgeHit+UEHit.sum())/(index+1),5)
            UEHitrate [int(index // 10000)]   = round(UEHit.sum()/(index+1),5)
            edgeHitrate [int(index // 10000)] = round(edgeHit/(index+1),5)
            privacyReduction [int(index // 10000)] = round(float(psi)/len(UEs),5)
    psi = 0
    p = torch.from_numpy(env.p)
    for u in UEs:
        psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
    print()
    print("----------------------------------------------------------------")
    print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
    print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
    print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
    print("----------------------------------------------------------------")
    print()

    sumHitrate [int(round(index / 10000,0))]  = round((edgeHit+UEHit.sum())/(index+1),5)
    UEHitrate  [int(round(index / 10000,0))]  = round(UEHit.sum()/(index+1),5)
    edgeHitrate[int(round(index / 10000,0))]  = round(edgeHit/(index+1),5)
    privacyReduction [int(round(index / 10000,0))] = round(float(psi)/len(UEs),5)

--Time: Wed Sep 15 22:59:36 2021 Episode: 0   Index: 0   Loss: 0.0 --
Reward: [0. 0. 0.] total reward: 0.0
UEHitrate: 0.0  edgeHitrate 0.0 sumHitrate 0.0  privacy: 1.0

--Time: Wed Sep 15 22:59:47 2021 Episode: 0   Index: 10000   Loss: 0.0 --
Reward: [0.10938 0.1794  0.00074] total reward: 0.28952
UEHitrate: 0.0018  edgeHitrate 0.39936 sumHitrate 0.40116  privacy: 1.0

--Time: Wed Sep 15 22:59:58 2021 Episode: 0   Index: 20000   Loss: 0.0 --
Reward: [0.03867 0.18685 0.00087] total reward: 0.22639
UEHitrate: 0.00195  edgeHitrate 0.41593 sumHitrate 0.41788  privacy: 1.0

--Time: Wed Sep 15 23:00:09 2021 Episode: 0   Index: 30000   Loss: 0.0 --
Reward: [0.01672 0.19601 0.00083] total reward: 0.21356
UEHitrate: 0.0024  edgeHitrate 0.43615 sumHitrate 0.43855  privacy: 1.0

--Time: Wed Sep 15 23:00:20 2021 Episode: 0   Index: 40000   Loss: 0.0 --
Reward: [0.01123 0.19757 0.00101] total reward: 0.20981
UEHitrate: 0.00247  edgeHitrate 0.43974 sumHitrate 0.44221  privacy: 1.0

--Time: Wed Sep 1

In [26]:
sumHitrate, UEHitrate, edgeHitrate

(array([0.     , 0.40116, 0.41788, 0.43855, 0.44221, 0.44403, 0.44093,
        0.44305, 0.44431, 0.45009]),
 array([0.     , 0.0018 , 0.00195, 0.0024 , 0.00247, 0.00246, 0.00258,
        0.0028 , 0.00297, 0.00298]),
 array([0.     , 0.39936, 0.41593, 0.43615, 0.43974, 0.44157, 0.43834,
        0.44025, 0.44133, 0.44711]))

In [27]:
privacyReduction

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])