In [8]:
import numpy as np
import filecmp
from pulp import *
import time

In [9]:
class LP:
    def __init__(self,filename):
        f=open(f"MDPS/{filename}")
        file=f.read()
        f.close()
        file=file.split("\n")
        del file[-1]
        self.n_s=int(file.pop(0).split()[1])
        self.n_a=int(file.pop(0).split()[1])
        self.gamma=float(file.pop().split()[1])
        self.MDP={}
        for i in range(self.n_s):
            self.MDP[i]={}
            for j in range(self.n_a):
                self.MDP[i][j]=[]
                for line in file:
                    line=line.split()
                    if(int(line[1])==i and int(line[2])==j):
                        self.MDP[i][j].append(tuple([int(line[3]),float(line[4]),float(line[5])]))
                        
    
    def linear_programming(self):

        self.pi=np.zeros(self.n_s,dtype=int)
        while True:
    
            model=LpProblem("MDP",LpMinimize)

            #variables
            val={i : LpVariable(name=f"x{i}",lowBound=0,cat=LpContinuous) for i in range(self.n_s*self.n_a)}

    #constraints
    
            for s in range(self.n_s):
                
                for a in range(self.n_a):
                    acc=0
                    for next_state,reward,prob in self.MDP[s][a]:
                        acc+=prob*(reward+self.gamma*val[next_state*self.n_a+self.pi[next_state]])
                        #print(np.max(np.array(l)))
                    model+=((val[s*self.n_a+a]-acc) >=0,f"val{s*self.n_a+a}_constraint")
            

    #objective
            model+=lpSum(val.values())

    # Solve the optimization problem
            status = model.solve()


          

            vals=np.array(["%.6f" % round(var.value(), 6) for var in val.values()]).reshape([self.n_s,self.n_a])
            
            pi_new=np.argmax(vals,axis=1)
            
            if(np.array_equal(self.pi,pi_new)):
                break
            self.pi=pi_new

        self.values=vals[[i for i in range(self.n_s)],self.pi]
    #----------------------------------------------------------xxx--------------------------------------------------------------------------#

    def output(self,output_file):
        f=open(f"LP-sol-{output_file}",'w')
        for i in range(self.n_s):
            f.write(f"{self.values[i]} {self.pi[i]}\n")
            #f.write(f"{self.values[i]}\n")
        f.close

    def compare_sol(self,file1,file2):
        compare=filecmp.cmp(file1,file2)
        if(compare):
            print("Solution is correct")
        else:
            print("Wrong solution")

In [10]:
start=time.time()
First=LP("mdp-2-2.txt")
First.linear_programming()
First.output("mdp-2-2.txt")
First.compare_sol("MDPS/sol-mdp-2-2.txt","LP-sol-mdp-2-2.txt")

end=time.time()
print("Time taken is",end-start,"s")

Solution is correct
Time taken is 0.03276538848876953 s


In [11]:
start=time.time()
Second=LP("mdp-10-5.txt")
Second.linear_programming()
Second.output("mdp-10-5.txt")
Second.compare_sol("MDPS/sol-mdp-10-5.txt","LP-sol-mdp-10-5.txt")

end=time.time()
print("Time taken is",end-start,"s")

Solution is correct
Time taken is 0.09855031967163086 s
