In [1]:
import numpy as np
import filecmp
from pulp import *

In [39]:
class LP:
    def __init__(self,filename):
        f=open(f"MDPS/{filename}")
        file=f.read()
        f.close()
        file=file.split("\n")
        del file[-1]
        self.n_s=int(file.pop(0).split()[1])
        self.n_a=int(file.pop(0).split()[1])
        self.gamma=float(file.pop().split()[1])
        self.MDP={}
        for i in range(self.n_s):
            self.MDP[i]={}
            for j in range(self.n_a):
                self.MDP[i][j]=[]
                for line in file:
                    line=line.split()
                    if(int(line[1])==i and int(line[2])==j):
                        self.MDP[i][j].append(tuple([int(line[3]),float(line[4]),float(line[5])]))
    
    def linear_programming(self):

        model=LpProblem("MDP",LpMinimize)

        #variables
        val={i : LpVariable(name=f"x{i}",lowBound=0,cat=LpContinuous) for i in range(self.n_s)}
        
        #constraints
        for s in range(self.n_s):
            acc=0
            for a in range(self.n_a):
                for next_state,reward,prob in self.MDP[s][a]:
                    acc+=prob*(reward+self.gamma*val[next_state])
                    acc=(-1)*acc
            model+=(val[s]+acc>=0,f"val{s}_contraint")

        #objective
        model+=lpSum(val.values())

        # Solve the optimization problem
        status = model.solve()

        # Get the results
        print(f"status: {model.status}, {LpStatus[model.status]}")
        print(f"objective: {model.objective.value()}")

        self.values=[]
        for var in val.values():
            self.values.append(float(var.value()))

        q=np.zeros([self.n_s,self.n_a])
        for s in range(self.n_s):
            for a in range(self.n_a):
                for next_state,reward,prob in self.MDP[s][a]:
                       q[s][a]+=prob*(reward+self.gamma*self.values[next_state])
        self.pi=np.argmax(q,axis=1)

    def output(self,output_file):
        f=open(f"LP-sol-{output_file}",'w')
        for i in range(self.n_s):
            # val="%.6f" % round(float(self.values[i]), 6)
            f.write(f"{self.values[i]} {self.pi[i]}\n")
            #f.write(f"{self.values[i]}\n")
        f.close

    def compare_sol(self,file1,file2):
        compare=filecmp.cmp(file1,file2)
        if(compare):
            print("Solution is correct")
        else:
            print("Wrong solution")

In [40]:
First=LP("mdp-2-2.txt")
First.linear_programming()
First.output("mdp-2-2.txt")
First.compare_sol("MDPS/sol-mdp-2-2.txt","LP-sol-mdp-2-2.txt")


status: 1, Optimal
objective: 0.73244355
Wrong solution


In [None]:
Second=LP("mdp-10-5.txt")
Second.linear_programming()
Second.output("mdp-10-5.txt")
Second.compare_sol("MDPS/sol-mdp-10-5.txt","LP-sol-mdp-10-5.txt")

In [4]:
f=open("Week1/MDPS/mdp-2-2.txt")
file=f.read()
file=file.split("\n")
del file[-1]
n_s=int(file.pop(0).split()[1])
n_a=int(file.pop(0).split()[1])
gamma=float(file.pop().split()[1])
MDP={}
for i in range(n_s):
    MDP[i]={}
    for j in range(n_a):
        MDP[i][j]=[]
        for line in file:
            line=line.split()
            if(int(line[1])==i and int(line[2])==j):
                MDP[i][j].append(tuple(line[3:]))
                

FileNotFoundError: [Errno 2] No such file or directory: 'Week1/MDPS/mdp-2-2.txt'