# Task 2 - Markov Decision Process


In [2]:
import numpy as np
import WindProcess
import PriceProcess
from data import get_fixed_data

class HydrogenMDP:
    def __init__(self, policy, episodes=10):
        self.data = get_fixed_data()
        self.T = self.data['num_timeslots']
        self.episodes = episodes
        self.policy = policy  # Policy function: (s, t) -> action

    def simulate(self):
        total_rewards = []
        for _ in range(self.episodes):
            total_reward = self.run_episode()
            total_rewards.append(total_reward)
        return np.mean(total_rewards)

    def run_episode(self):
        s_h = 0  # Initial hydrogen storage
        total_reward = 0
        wind_series = np.random.normal(self.data['target_mean_wind'], 1, self.T)
        price_series = np.random.normal(self.data['mean_price'], 5, self.T)
        
        for t in range(self.T):
            state = (s_h, wind_series[t], price_series[t])
            action = self.policy(state, t)
            
            x_t, p2h_t, h2p_t, g_t = action
            
            s_h = min(max(s_h + self.data['conversion_p2h'] * p2h_t - h2p_t / self.data['conversion_h2p'], 0), self.data['hydrogen_capacity'])
            cost = price_series[t] * g_t + self.data['electrolyzer_cost'] * x_t
            total_reward -= cost  # Negative cost as reward
        
        return total_reward

# Dummy policy: Never use electrolyzer
def dummy_policy(state, t):
    s_h, p_wind, lambda_grid = state
    return (0, 0, 0, max(0, get_fixed_data()['demand_schedule'][t] - p_wind))

# Run simulation
mdp = HydrogenMDP(dummy_policy, episodes=100)
average_performance = mdp.simulate()
print("Average performance of dummy policy:", average_performance)

Average performance of dummy policy: -845.5668737655192
