In [1]:

using POMDPs, QuickPOMDPs, MCTS, DiscreteValueIteration, POMDPSimulators, POMDPModels, POMDPPolicies, POMDPModelTools
using Distributions, Combinatorics, StaticArrays, Statistics
using FileIO, JLD2, TickTock

## State Functions

In [2]:
function state_cnt(n, S) #n = number of units; S = number of states
    if S==1
        return 1
    end
    return binomial(n+S-1, S-1)
end

function state_index(S,s) #s = state vector
    #S = length(s)
    if S == 1
        return 1
    end
    if s[S]==0
        return state_index(S-1, s[1:(S-1)])
    end
    
    n_prev = sum(s)
    prev = state_cnt(n_prev, S-1) # count of all states with s[S]=0
    inc = prev
    for i in 1:(s[S]-1)
        inc = inc/(n_prev+S-2)*n_prev #count of all states with s[S]=i
        prev = prev + inc
        n_prev = n_prev - 1
    end
    return prev + state_index(S-1, s[1:(S-1)])
end

function state_vec(n, S, ind) # n = number of units; S = number of states; ind = state index
    if ind < 0.5 || ind > state_cnt(n, S) + 0.5
        println("index is out of range!")
        return -1
    end
    if S==1 
        return [n]
    end
    if ind < state_cnt(n, S-1) + 0.5
        return push!(state_vec(n, S-1, ind), 0)
    end
    prev = state_cnt(n, S-1)
    inc = prev
    last_state = 0
    n_prev = n
    while ind > prev + 0.5
        inc = inc/(n_prev+S-2)*n_prev #count of all sta_tes with s[S]=last_state
        prev += inc
        n_prev -= 1
        last_state += 1
    end
    return push!(state_vec(n-last_state, S-1, ind - prev + inc), last_state)
end

#using hueristic rule for rollout
mutable struct nNRollout <: Policy
    n::Int64
    N::Int64
end

mutable struct nmNRollout <: Policy
    n::Int64
    m::Int64
    N::Int64
end

mutable struct mystate
    state::Vector{Int64};
end

In [3]:
function POMDPs.action(p::nNRollout, s::mystate)
        local_a = 0;
        local_s = s.state;
        nN = sample(1:Number_level, 2, replace = true);
        p.N = maximum(nN);
        p.n = minimum(nN);
        if sum(local_s[p.N:Number_level])>=1
        local_a = p.n;
        end
        return local_a; 
end

function POMDPs.action(p::nmNRollout, s::mystate)
    local_a = 0;
    local_s = s.state;
    nmN = sample(1:Number_level, 3, replace = true);
    p.N = maximum(nmN);
    p.n = minimum(nmN);
    p.m = sum(nmN)-p.N-p.n;
    if sum(local_s[p.N:Number_level])>=1 || sum(local_s[p.m:Number_level])>=2
        local_a = p.n;
    end
    return local_a; 
end

In [4]:
function findNn()
    println("Finding best nN policy...")
for N in 2:(Number_level)
    for n in 1:N
#         println("N = ",N, "  n = ",n);
        Threads.@threads  for j in 1:repetition
            history = sim(multiunit2,max_steps=simsteps,initialstate=mystate(state_vec(NumberUnits, Number_level, 1))) do s
            local_s = s.state;
            local_a = 0;
                if sum(local_s[N:Number_level])>=1
                local_a = n;
                end
                return local_a; 
            end
            temp_rewards[j] = discounted_reward(history);
        end
        rewards_nN[n,N] = mean(temp_rewards);
            rewards_nN_std[n,N] = std(temp_rewards);
    end
end
(max_rward,nN) = findmax(rewards_nN);
println("Max rewards of (N, n) policy is ", max_rward, "  n is ", nN[1], "  N is ",nN[2]);
return  max_rward,rewards_nN_std[nN[1],nN[2]],nN;
end


findNn (generic function with 1 method)

In [5]:
using DataFrames
df= DataFrame(u=[],n=[],s=[],m=[],f=[],mean=[],nN=[],std=[])

  
cost=[ 
     [0 -200 -200 -1000]
    ]

for units in [20,30]
    global NumberUnits=convert(Int64,units)
global Number_level = 10;
global fullname = "tm10.jld2";
fullname = "tm10";

Transition_matrix = load(fullname*".jld2","transition_matrix");

global state_number = state_cnt(NumberUnits,Number_level);
global crd = Array{Categorical}(undef,Number_level)
for i in 1:Number_level
    global crd[i] = Categorical(Transition_matrix[i,:]);
end
    
    
    
global multiunit2 = QuickMDP(
    gen = function (s, a, rng)       #s is a vector of number units at each level and a is the number of units we will repair
        local_s = s.state;
        # println(local_s)
        # based on s, create a status vector corresponding to each unit
        degradation_state = repeat(1:1,NumberUnits);
        k = 1;
        for i in 1:Number_level
            for j in 1:local_s[i]
                degradation_state[k]=i;
                k = k+1;
            end
        end
        r = 0.0;
        prevent_repair = false;
        if a!=0
        number_reset = sum(local_s[a:Number_level]);
        else
        number_reset = local_s[Number_level];
        end
        #using a for loop to compute next state for each unit

        for i in 1:(NumberUnits-number_reset)  #a is the number of units we want to preventively repair
            #in this loop, all units continues
            degradation_state[i] = rand(crd[degradation_state[i]]);
            r = r+normal_operation;
        end
        
        for i in (NumberUnits-number_reset+1):NumberUnits
            if degradation_state[i] == Number_level
                r = r + failure_penalty;
                if prevent_repair == false
                    r = r+setup_cost;
                    prevent_repair = true;
                end
            else
                r = r + maintenance_penalty;
                if prevent_repair == false
                    r = r+setup_cost;
                    prevent_repair = true;
                end
            end
            degradation_state[i] = rand(crd[1]); #reset status; add additional transition
            r = r+normal_operation; #add operation benefit
         end
        #collect degradation state to form the state
        sp = repeat(0:0,Number_level);
        for i in 1:NumberUnits
            sp[degradation_state[i]] = sp[degradation_state[i]]+1;
        end
        return (sp=mystate(sp), r=r)
    end,
    actions = 0:(Number_level-1), 
    actiontype = function()
        return Int64;
    end,
#     states = arrayofstates,
    initialstate = function()
        POMDPModelTools.ImplicitDistribution() do rng
            return (mystate(state_vec(NumberUnits, Number_level, 1)))
        end
    end, #all u #all units start fresh. Need to change according to unit number and level number. ##For simulation, we need to use ImplicitDistribution
    discount = 0.95,
    isterminal = false              # no ending
)


    
    
    
for c in cost
   print(units)  
    tick()
    global normal_operation,setup_cost,maintenance_penalty,failure_penalty=c
     println([normal_operation,setup_cost,maintenance_penalty,failure_penalty])     
   global  simsteps = 100;
global repetition = 10000;
global rewards_nN = zeros(Number_level,Number_level);
global rewards_nN = rewards_nN.+(-100000000.0);
global rewards_nN_std = zeros(Number_level,Number_level);
global rewards_nN_std = rewards_nN_std.+(-100000000.0);
global discount_factor =0.95
global temp_rewards = zeros(repetition,1);
rewards,std, nN=findNn()
pushfirst!(df,[units,normal_operation,setup_cost,maintenance_penalty,failure_penalty,rewards, nN,std])
   tock()     
    end
    
    
end

20

┌ Info:  started timer at: 2023-04-17T10:06:19.747
└ @ TickTock C:\Users\vbansal5\.julia\packages\TickTock\fGILW\src\TickTock.jl:54


[0, -200, -200, -1000]
Finding best nN policy...
Max rewards of (N, n) policy is -27416.28425094323  n is 7  N is 9
30[0, -200, -200, -1000]
Finding best nN policy...


┌ Info:          192.9798647s: 3 minutes, 12 seconds, 979 milliseconds
└ @ TickTock C:\Users\vbansal5\.julia\packages\TickTock\fGILW\src\TickTock.jl:62
┌ Info:  started timer at: 2023-04-17T10:09:32.860
└ @ TickTock C:\Users\vbansal5\.julia\packages\TickTock\fGILW\src\TickTock.jl:54


Max rewards of (N, n) policy is -39634.786021275744  n is 8  N is 9


┌ Info:          248.3026984s: 4 minutes, 8 seconds, 302 milliseconds
└ @ TickTock C:\Users\vbansal5\.julia\packages\TickTock\fGILW\src\TickTock.jl:62


In [6]:
df

Row,u,n,s,m,f,mean,nN,std
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any
1,30,0,-200,-200,-1000,-39634.8,"CartesianIndex(8, 9)",2238.23
2,20,0,-200,-200,-1000,-27416.3,"CartesianIndex(7, 9)",1668.53


In [7]:
using CSV
CSV.write("nN_new.csv", df)

"nN_new.csv"