In [1]:
import gym
import math
import random
import numpy as np
import pandas as pd

from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import time

import collections
import copy

#env = gym.make('CartPole-v0').unwrapped

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# if gpu is to be used
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=1)

In [3]:
data_path = '/home/ubuntu/data/dataset/R3009_U5_V100/'
UIT = pd.read_csv(data_path + 'UIT.csv')
UIT

Unnamed: 0,u,i,day,time,viewtime,video_type,video_format,city,city_isp,client_ip,conn_type,device_type
0,365,3391,0,0,148,1030,101001,0,0,11807,1,2
1,203,5779,0,0,7,1030,10203,0,0,15068,1,2
2,208,4675,0,0,92,1035,10203,0,0,5375,1,2
3,159,332,0,0,56,1030,10202,0,0,5992,1,2
4,50,674,0,0,439,1030,10203,0,0,3468,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...
300978,483,6831,29,2591880,34,1030,10203,0,0,10010,1,2
300979,158,8448,29,2591880,34,1030,10203,0,0,23340,1,2
300980,483,6463,29,2591940,35,1030,10203,0,0,10010,1,2
300981,158,4715,29,2591940,34,1030,10203,0,0,23340,1,2


In [4]:
trainUIT = UIT[UIT['day']<18]
contentNum = len(UIT.i.drop_duplicates())
userNum = len(UIT.u.drop_duplicates())
contentNum,userNum,trainUIT

(10000,
 500,
           u     i  day     time  viewtime  video_type  video_format  city  \
 0       365  3391    0        0       148        1030        101001     0   
 1       203  5779    0        0         7        1030         10203     0   
 2       208  4675    0        0        92        1035         10203     0   
 3       159   332    0        0        56        1030         10202     0   
 4        50   674    0        0       439        1030         10203     0   
 ...     ...   ...  ...      ...       ...         ...           ...   ...   
 198170  264  7442   17  1555140        90        1035         10203     0   
 198171   19  9362   17  1555140       424        1035         10203     0   
 198172   82  9223   17  1555140        94        1037         10203     0   
 198173   35  4164   17  1555140        22        1030         10203     0   
 198174  239  5062   17  1555140        89        1035         10203     0   
 
         city_isp  client_ip  conn_type  device_

In [5]:
class ENV(object):
    def __init__(self,userNum,contentNum):
        self.userNum = userNum
        self.contentNum =contentNum

        self.r = np.zeros(shape=(userNum,contentNum),dtype=int)
        self.p = np.full(shape=contentNum,fill_value = 1/userNum)
        self.e = np.zeros(shape=contentNum)
        self.S = np.ones(shape=contentNum,dtype=int)
        self.l_edge = 0.1
        self.l_cp = 1

        self.B = np.full(shape=userNum,fill_value=15,dtype=int)

        self.pipe = collections.OrderedDict()


    #有序字典实现LRU
    def updateEgdeCache(self,action,t):
        for i in np.argwhere(action==1).squeeze(-1):
            if i in self.pipe.keys():
                self.pipe.pop(i)
            elif len(self.pipe) >= 500:
                self.e[self.pipe.popitem(last=False)[0]] = 0
            self.pipe[i] = t
            self.e[i] = 1

    
    def updateEnv(self,u,action,t):
        
        p_tmp = ((self.r[u] | action)-self.r[u])*(1/self.userNum) + self.p
        self.p = np.where(p_tmp<1-1/self.userNum,p_tmp,1-1/self.userNum)

        self.r[u] = self.r[u] | action

        self.updateEgdeCache(action,t)

    def getStatus(self):
        return (torch.from_numpy(self.r),
                torch.from_numpy(self.p) , 
                torch.from_numpy(self.e),
                torch.from_numpy(self.S),
                self.l_edge,
                self.l_cp)

    #def reset(self):
    #    self.r = np.zeros(shape=(self.userNum,self.contentNum),dtype=int)
    #    self.p = np.full(shape=self.contentNum,fill_value = 1/self.userNum)
    #    self.e = np.zeros(shape=self.contentNum)
    #    self.S = np.ones(shape=self.contentNum,dtype=int)
    #    self.l_edge = 0.1
    #    self.l_cp = 1
    #    self.B = np.full(shape=self.userNum,fill_value=15,dtype=int)
    #    self.pipe = collections.OrderedDict()


In [6]:
class UE_random(object):
    def __init__(self,u,env,rewardPara):
        self.u = u

        self.W = []
        self.v = torch.zeros(size=(env.contentNum,),dtype=int)

        self.Bu = int(env.B[self.u])
        self.contentNum = env.contentNum
        self.userNum = env.userNum

        self.r , self.p , self.e, self.S,self.l_edge, self.l_cp = env.getStatus()

        self.action = torch.zeros(size=(env.contentNum,),dtype=int)
        self.lastAction = self.action

        self.reward = 0
        self.ALPHAh = rewardPara['alpha']
        self.BETAo =  rewardPara['betao']
        self.BETAl =  rewardPara['betal']

    def updateViewContent(self,i):
        self.W.append(i)
        self.v[i] = 1

    
    def getReward(self,lastru,lastp,ru,p,i,action,S,Bu,l_edge,l_cp,e):

        self.Rh = - self.ALPHAh * (torch.log(ru * p + (1-ru) * (1-p)) - torch.log(lastru * lastp + (1-lastru) * (1-lastp))).sum()

        self.Ro =   self.BETAo * action[i] * (S[i] / Bu + ( e[i] * l_edge + ( 1-e[i] ) * l_cp ) / S[i])

        self.Rl =   self.BETAl * ( ( 1 - action[i] )  * ( l_cp - ( e[i] * l_edge + ( 1 - e[i] ) * l_cp ) ) ) / S[i]

        #self.Rh[i] = self.Rh[i] + self.Ro + self.Rl

        return  self.Rh+self.Ro+self.Rl

    def selectAction(self,env,uit):


        self.lastAction = self.action
        self.lastp = self.p
        self.lastr = self.r

        self.updateViewContent(uit[1])
        self.r , self.p , self.e, self.S, self.l_edge, self.l_cp = env.getStatus()
        
        self.reward = self.getReward(self.lastr[self.u],self.lastp,self.r[self.u],self.p,self.W[-1],self.lastAction,self.S,self.Bu,self.l_edge,self.l_cp,self.e)
        
        self.action = torch.zeros(size=(env.contentNum,),dtype=int)
        self.action[self.W[-1]] = 1

        actionIndex = list(torch.randint(0,self.contentNum,(self.Bu,)))
        
        if self.W[-1] not in actionIndex:
            actionIndex.pop()
        for index in actionIndex:
            self.action[index] = 1

        env.updateEnv(self.u,self.action.numpy(),uit[2])

        return self.action

In [7]:
num_episodes = 1
TARGET_UPDATE = 1
bestReward =  float("-inf")

env = ENV(userNum,contentNum)
UEs = {}
sumReward = np.zeros(3)
loss = 0
UEHit = np.zeros(userNum)
edgeHit = 0

rewardPara = {"alpha":1,"betao":1,"betal":1}

sumHitrate = np.zeros(UIT.shape[0]// 10000 +2)
UEHitrate = np.zeros(UIT.shape[0]// 10000 +2)
edgeHitrate = np.zeros(UIT.shape[0]// 10000 +2)
privacyReduction = np.zeros(UIT.shape[0]// 10000 +2)


for i_episode in range(num_episodes):
    # Initialize the environment and state

    for index,trace in UIT.iterrows():
        uit = trace.to_numpy()
        if uit[0] not in UEs:
            UEs[uit[0]] = UE_random(uit[0],env,rewardPara)

        ue = UEs[uit[0]]
        
        actionIndex = np.argwhere(ue.lastAction.numpy())
        if uit[1] in actionIndex:
            UEHit[uit[0]] += 1
        elif uit[1] in env.pipe.keys():
            edgeHit += 1

        ue.selectAction(env,uit)

        sumReward[0] += float(ue.Rh)
        sumReward[1] += float(ue.Rl)
        sumReward[2] += float(ue.Ro)

        if index % 10000 == 0 :
            psi = 0
            p = torch.from_numpy(env.p)
            for u in UEs:
                psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
            print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
            print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
            print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
            print()
            sumHitrate[int(index // 10000)]   = round((edgeHit+UEHit.sum())/(index+1),5)
            UEHitrate [int(index // 10000)]   = round(UEHit.sum()/(index+1),5)
            edgeHitrate [int(index // 10000)] = round(edgeHit/(index+1),5)
            privacyReduction [int(index // 10000)] = round(float(psi)/len(UEs),5)
    psi = 0
    p = torch.from_numpy(env.p)
    for u in UEs:
        psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
    print()
    print("----------------------------------------------------------------")
    print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
    print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
    print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
    print("----------------------------------------------------------------")
    print()

    sumHitrate [int(round(index / 10000,0))]  = round((edgeHit+UEHit.sum())/(index+1),5)
    UEHitrate  [int(round(index / 10000,0))]  = round(UEHit.sum()/(index+1),5)
    edgeHitrate[int(round(index / 10000,0))]  = round(edgeHit/(index+1),5)
    privacyReduction [int(round(index / 10000,0))] = round(float(psi)/len(UEs),5)

--Time: Wed Sep 22 14:02:37 2021 Episode: 0   Index: 0   Loss: 0.0 --
Reward: [0. 0. 0.] total reward: 0.0
UEHitrate: 0.0  edgeHitrate 0.0 sumHitrate 0.0  privacy: 4.02118

--Time: Wed Sep 22 14:02:50 2021 Episode: 0   Index: 10000   Loss: 0.0 --
Reward: [-5.35585e+00  7.82900e-02  3.22000e-03] total reward: -5.27434
UEHitrate: 0.003  edgeHitrate 0.08759 sumHitrate 0.09059  privacy: 3.87249

--Time: Wed Sep 22 14:03:02 2021 Episode: 0   Index: 20000   Loss: 0.0 --
Reward: [-4.57106e+00  8.10000e-02  4.40000e-03] total reward: -4.48567
UEHitrate: 0.003  edgeHitrate 0.0904 sumHitrate 0.0934  privacy: 3.39743

--Time: Wed Sep 22 14:03:15 2021 Episode: 0   Index: 30000   Loss: 0.0 --
Reward: [-3.90029  0.0753   0.0048 ] total reward: -3.82019
UEHitrate: 0.00327  edgeHitrate 0.08396 sumHitrate 0.08723  privacy: 3.05268

--Time: Wed Sep 22 14:03:27 2021 Episode: 0   Index: 40000   Loss: 0.0 --
Reward: [-3.51531  0.07603  0.00478] total reward: -3.4345
UEHitrate: 0.00352  edgeHitrate 0.08472 

In [8]:
sumHitrate, UEHitrate, edgeHitrate

(array([0.     , 0.09059, 0.0934 , 0.08723, 0.08825, 0.08818, 0.08638,
        0.08448, 0.08411, 0.08362, 0.08257, 0.08212, 0.08162, 0.08092,
        0.08119, 0.08095, 0.08079, 0.08082, 0.08106, 0.08078, 0.08094,
        0.08124, 0.08088, 0.08085, 0.0807 , 0.08114, 0.08093, 0.08072,
        0.08097, 0.08117, 0.08118, 0.     ]),
 array([0.     , 0.003  , 0.003  , 0.00327, 0.00352, 0.00378, 0.00378,
        0.0037 , 0.00371, 0.00387, 0.00383, 0.00393, 0.00398, 0.00419,
        0.00428, 0.00436, 0.00438, 0.00438, 0.00438, 0.00446, 0.00444,
        0.0045 , 0.0045 , 0.00455, 0.00458, 0.00466, 0.0047 , 0.00477,
        0.00481, 0.00481, 0.00479, 0.     ]),
 array([0.     , 0.08759, 0.0904 , 0.08396, 0.08472, 0.0844 , 0.0826 ,
        0.08078, 0.0804 , 0.07975, 0.07874, 0.07819, 0.07764, 0.07673,
        0.07691, 0.07659, 0.07641, 0.07643, 0.07668, 0.07633, 0.0765 ,
        0.07675, 0.07639, 0.0763 , 0.07611, 0.07648, 0.07623, 0.07596,
        0.07615, 0.07636, 0.07638, 0.     ]))

In [9]:
privacyReduction

array([4.02118, 3.87249, 3.39743, 3.05268, 2.80303, 2.58978, 2.42809,
       2.29468, 2.16831, 2.05126, 1.93582, 1.83487, 1.7502 , 1.66939,
       1.59503, 1.52718, 1.46064, 1.40018, 1.33967, 1.28351, 1.23272,
       1.18247, 1.13662, 1.09053, 1.04733, 1.00647, 0.96889, 0.93318,
       0.8988 , 0.86455, 0.83082, 0.     ])

In [10]:
class UE_None(object):
    def __init__(self,u,env,rewardPara):
        self.u = u

        self.W = []
        self.v = torch.zeros(size=(env.contentNum,),dtype=int)

        self.Bu = int(env.B[self.u])
        self.contentNum = env.contentNum
        self.userNum = env.userNum

        self.r , self.p , self.e, self.S,self.l_edge, self.l_cp = env.getStatus()

        self.action = torch.zeros(size=(env.contentNum,),dtype=int)
        self.lastAction = self.action

        self.reward = 0
        self.ALPHAh = rewardPara['alpha']
        self.BETAo =  rewardPara['betao']
        self.BETAl =  rewardPara['betal']

    def updateViewContent(self,i):
        self.W.append(i)
        self.v[i] = 1

    
    def getReward(self,lastru,lastp,ru,p,i,action,S,Bu,l_edge,l_cp,e):

        self.Rh = - self.ALPHAh * (torch.log(lastru * lastp + (1-lastru) * (1-lastp)).sum() - torch.log(ru * p + (1-ru) * (1-p)).sum())

        self.Ro =   self.BETAo * action[i] * (S[i] / Bu + ( e[i] * l_edge + ( 1-e[i] ) * l_cp ) / S[i])

        self.Rl =   self.BETAl * ( ( 1 - action[i] )  * ( l_cp - ( e[i] * l_edge + ( 1 - e[i] ) * l_cp ) ) ) / S[i]

        #self.Rh[i] = self.Rh[i] + self.Ro + self.Rl

        return  self.Rh+self.Ro+self.Rl

    def selectAction(self,env,uit):

        self.lastAction = self.action
        self.lastp = self.p
        self.lastr = self.r

        self.updateViewContent(uit[1])
        self.r , self.p , self.e, self.S, self.l_edge, self.l_cp = env.getStatus()
        
        self.reward = self.getReward(self.lastr[self.u],self.lastp,self.r[self.u],self.p,self.W[-1],self.lastAction,self.S,self.Bu,self.l_edge,self.l_cp,self.e)
        
        self.action = torch.zeros(size=(env.contentNum,),dtype=int)
        self.action[self.W[-1]] = 1

        env.updateEnv(self.u,self.action.numpy(),uit[2])

        return self.action

In [12]:
num_episodes = 1
TARGET_UPDATE = 1
bestReward =  float("-inf")

env = ENV(userNum,contentNum)
UEs = {}
sumReward = np.zeros(3)
loss = 0
UEHit = np.zeros(userNum)
edgeHit = 0

rewardPara = {"alpha":1,"betao":0.5,"betal":0.5}

sumHitrate = np.zeros(UIT.shape[0]// 10000 +2)
UEHitrate = np.zeros(UIT.shape[0]// 10000 +2)
edgeHitrate = np.zeros(UIT.shape[0]// 10000 +2)
privacyReduction = np.zeros(UIT.shape[0]// 10000 +2)


for i_episode in range(num_episodes):
    # Initialize the environment and state

    for index,trace in UIT.iterrows():
        uit = trace.to_numpy()
        if uit[0] not in UEs:
            UEs[uit[0]] = UE_None(uit[0],env,rewardPara)

        ue = UEs[uit[0]]
        
        actionIndex = np.argwhere(ue.lastAction)
        if uit[1] in actionIndex:
            UEHit[uit[0]] += 1
        elif uit[1] in env.pipe.keys():
            edgeHit += 1

        ue.selectAction(env,uit)

        sumReward[0] += float(ue.Rh)
        sumReward[1] += float(ue.Rl)
        sumReward[2] += float(ue.Ro)

        if index % 10000 == 0 :
            psi = 0
            p = torch.from_numpy(env.p)
            for u in UEs:
                psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
            print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
            print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
            print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
            print()
            sumHitrate[int(index // 10000)]   = round((edgeHit+UEHit.sum())/(index+1),5)
            UEHitrate [int(index // 10000)]   = round(UEHit.sum()/(index+1),5)
            edgeHitrate [int(index // 10000)] = round(edgeHit/(index+1),5)
            privacyReduction [int(index // 10000)] = round(float(psi)/len(UEs),5)
    psi = 0
    p = torch.from_numpy(env.p)
    for u in UEs:
        psi += torch.log(UEs[u].r[u] * p + (1-UEs[u].r[u]) * (1-p)).sum() / torch.log(UEs[u].v * p + (1-UEs[u].v) * (1-p)).sum()
    print()
    print("----------------------------------------------------------------")
    print("--Time:",time.asctime( time.localtime(time.time())),"Episode:",i_episode,"  Index:",index,"  Loss:",round(loss/(index+1),5),"--")
    print("Reward:",np.around(sumReward/(index+1),5),"total reward:",round(sumReward.sum()/(index+1),5))
    print("UEHitrate:",round(UEHit.sum()/(index+1),5)," edgeHitrate",round(edgeHit/(index+1),5),"sumHitrate",round((edgeHit+UEHit.sum())/(index+1),5)," privacy:",round(float(psi)/len(UEs),5))
    print("----------------------------------------------------------------")
    print()

    sumHitrate [int(round(index / 10000,0))]  = round((edgeHit+UEHit.sum())/(index+1),5)
    UEHitrate  [int(round(index / 10000,0))]  = round(UEHit.sum()/(index+1),5)
    edgeHitrate[int(round(index / 10000,0))]  = round(edgeHit/(index+1),5)
    privacyReduction [int(round(index / 10000,0))] = round(float(psi)/len(UEs),5)

--Time: Wed Sep 22 14:49:10 2021 Episode: 0   Index: 0   Loss: 0.0 --
Reward: [0. 0. 0.] total reward: 0.0
UEHitrate: 0.0  edgeHitrate 0.0 sumHitrate 0.0  privacy: 1.0

--Time: Wed Sep 22 14:49:21 2021 Episode: 0   Index: 10000   Loss: 0.0 --
Reward: [0.34533 0.1717  0.00095] total reward: 0.51799
UEHitrate: 0.0012  edgeHitrate 0.38216 sumHitrate 0.38336  privacy: 1.0

--Time: Wed Sep 22 14:49:32 2021 Episode: 0   Index: 20000   Loss: 0.0 --
Reward: [0.25903 0.19043 0.00131] total reward: 0.45077
UEHitrate: 0.0014  edgeHitrate 0.42373 sumHitrate 0.42513  privacy: 1.0

--Time: Wed Sep 22 14:49:43 2021 Episode: 0   Index: 30000   Loss: 0.0 --
Reward: [0.20849 0.18301 0.00151] total reward: 0.393
UEHitrate: 0.00187  edgeHitrate 0.40742 sumHitrate 0.40929  privacy: 1.0

--Time: Wed Sep 22 14:49:54 2021 Episode: 0   Index: 40000   Loss: 0.0 --
Reward: [0.17622 0.17735 0.00155] total reward: 0.35512
UEHitrate: 0.0021  edgeHitrate 0.39479 sumHitrate 0.39689  privacy: 1.0

--Time: Wed Sep 22 1

In [13]:
sumHitrate, UEHitrate, edgeHitrate

(array([0.     , 0.38336, 0.42513, 0.40929, 0.39689, 0.39467, 0.39188,
        0.38434, 0.38802, 0.38563, 0.38521, 0.38561, 0.3852 , 0.38125,
        0.38528, 0.3895 , 0.38677, 0.38992, 0.39229, 0.39403, 0.3945 ,
        0.39625, 0.39444, 0.39309, 0.39189, 0.3924 , 0.39045, 0.38846,
        0.38847, 0.39078, 0.39444, 0.     ]),
 array([0.     , 0.0012 , 0.0014 , 0.00187, 0.0021 , 0.00242, 0.00232,
        0.00227, 0.00227, 0.00238, 0.00239, 0.00242, 0.00249, 0.00267,
        0.00273, 0.00281, 0.00282, 0.00285, 0.00284, 0.00293, 0.00292,
        0.00299, 0.00302, 0.00307, 0.00309, 0.00314, 0.00319, 0.00325,
        0.00327, 0.00328, 0.0033 , 0.     ]),
 array([0.     , 0.38216, 0.42373, 0.40742, 0.39479, 0.39225, 0.38956,
        0.38207, 0.38575, 0.38325, 0.38282, 0.3832 , 0.38271, 0.37858,
        0.38255, 0.38669, 0.38395, 0.38706, 0.38945, 0.3911 , 0.39157,
        0.39326, 0.39143, 0.39003, 0.3888 , 0.38926, 0.38726, 0.38521,
        0.3852 , 0.3875 , 0.39115, 0.     ]))

In [14]:
privacyReduction

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.])