In [1]:
import gym
import gym_dssat_pdi
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import glob

In [2]:
fnames = glob.glob("eval*.txt")

In [3]:
dfs = {}
for fname in fnames:
    with open(fname, 'r') as f:
        data = f.read()
        data = data.strip().split("\n")
        data = [d.split(':') for d in data]
        data = {d[0].strip(): [float(x) for x in d[1].replace(' ', '').replace('[', '').replace(']', '').strip().split(',')] for d in data}
        dfs[fname] = pd.DataFrame(data)

In [4]:
dfs['eval_output_cotton_True_all_123.txt'].describe()

Unnamed: 0,Null Agent,Expert Agent,PPO Agent,A2C Agent,DDPG Agent,TD3 Agent,SAC Agent
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,2253.889917,3912.057383,2891.311433,2253.889917,-7919.564761,-7919.564761,4428.500671
std,392.639243,1776.285213,472.888459,392.639243,2675.743113,2675.743113,2121.11637
min,1356.660939,1350.649891,1865.380959,1356.660939,-13726.692526,-13726.692526,-1934.556005
25%,1952.744966,2235.837298,2509.480403,1952.744966,-9881.046124,-9881.046124,3037.388995
50%,2251.545415,3869.72819,2881.411294,2251.545415,-7807.065291,-7807.065291,5184.09286
75%,2545.412841,5039.1546,3244.000219,2545.412841,-5908.974436,-5908.974436,6203.288115
max,3294.720838,8900.127178,3985.31224,3294.720838,-2894.025578,-2894.025578,7217.932737


In [5]:
dfs.keys()

dict_keys(['eval_output_maize_False_all_123.txt', 'eval_output_cotton_False_all_123.txt', 'eval_output_rice_True_all_123.txt', 'eval_output.txt', 'eval_output_maize_True_all_123.txt', 'eval_output_cotton_True_all_123.txt', 'eval_output_rice_False_all_123.txt'])

In [59]:
import gym
import gym_dssat_pdi
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

# helpers for action normalization
def normalize_action(action_space_limits, action):
    """Normalize the action from [low, high] to [-1, 1]"""
    low, high = action_space_limits
    return 2.0 * ((action - low) / (high - low)) - 1.0

def denormalize_action(action_space_limits, action):
    """Denormalize the action from [-1, 1] to [low, high]"""
    low, high = action_space_limits
    return low + (0.5 * (action + 1.0) * (high - low))

# Wrapper for easy and uniform interfacing with SB3
class GymDssatWrapper(gym.Wrapper):
    def __init__(self, env):
        super(GymDssatWrapper, self).__init__(env)

        self.action_low, self.action_high = self._get_action_space_bounds()

        # using a normalized action space
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype="float32")

        # using a vector representation of observations to allow
        # easily using SB3 MlpPolicy
        self.observation_space = gym.spaces.Box(low=0.0,
                                                high=np.inf,
                                                shape=env.observation_dict_to_array(
                                                    env.observation).shape,
                                                dtype="float32"
                                                )

        # to avoid annoying problem with Monitor when episodes end and things are None
        self.last_info = {}
        self.last_obs = None

    def _get_action_space_bounds(self):
        box = self.env.action_space['anfer']
        return box.low, box.high

    def _format_action(self, action):
        return { 'anfer': action[0] }

    def _format_observation(self, observation):
        return self.env.observation_dict_to_array(observation)

    def reset(self):
        return self._format_observation(self.env.reset())


    def step(self, action):
        # Rescale action from [-1, 1] to original action space interval
        denormalized_action = denormalize_action((self.action_low, self.action_high), action)
        formatted_action = self._format_action(denormalized_action)
        obs, reward, done, info = self.env.step(formatted_action)

        # handle `None`s in obs, reward, and info on done step
        if done:
            obs, reward, info = self.last_obs, 0, self.last_info
        else:
            self.last_obs = obs
            self.last_info = info

        formatted_observation = self._format_observation(obs)
        return formatted_observation, reward, done, info

    def close(self):
        return self.env.close()

    def seed(self, seed):
        self.env.set_seed(seed)

    def __del__(self):
        self.close()

# Create environment
env_args = {
    'mode': 'fertilization',
    'seed': 123,
    'random_weather': True,
}

env = GymDssatWrapper(gym.make('GymDssatPdi-v0', **env_args))

# Training arguments for PPO agent
ppo_args = {
    'gamma': 1,
    'learning_rate': 0.0003,
    'seed': 123,
}


# Baseline agents for comparison
class NullAgent:
    """
    Agent always choosing to do no fertilization
    """
    def __init__(self, env):
        self.env = env

    def predict(self, obs, state=None, episode_start=None, deterministic=None):
        action = normalize_action((self.env.action_low, self.env.action_high), [0])
        return np.array([action], dtype=np.float32), obs


class ExpertAgent:
    """
    Simple agent using policy of choosing fertilization amount based on days after planting
    """
    fertilization_dic = {
        40: 27,
        45: 35,
        80: 54,
    }

    def __init__(self, env, normalize_action=False, fertilization_dic=None):
        self.env = env
        self.normalize_action = normalize_action

    def _policy(self, obs):
        dap = int(obs[0][1])
        return [self.fertilization_dic[dap] if dap in self.fertilization_dic else 0]

    def predict(self, obs, state=None, episode_start=None, deterministic=None):
        action = self._policy(obs)
        action = normalize_action((self.env.action_low, self.env.action_high), action)
        print(self.env.history)
        return np.array([action], dtype=np.float32), obs


# evaluation and plotting functions
def evaluate(agent, n_episodes=10):
    # Create eval env
    eval_args = {
        'mode': 'fertilization',
        'seed': 456,
        'random_weather': True,
    }
    env = Monitor(GymDssatWrapper(gym.make('GymDssatPdi-v0', **eval_args)))

    returns, _ = evaluate_policy(
        agent, env, n_eval_episodes=n_episodes, return_episode_rewards=True)

    env.close()

    return returns

def plot_results(labels, returns):
    data_dict = {}
    for label, data in zip(labels, returns):
        data_dict[label] = data
    df = pd.DataFrame(data_dict)

    ax = sns.boxplot(data=df)
    ax.set_xlabel("policy")
    ax.set_ylabel("evaluation output")
    plt.savefig('results_sb3.pdf')
    print("\nThe result is saved in the current working directory as 'results_sb3.pdf'\n")
    plt.show()

# evaluate agents
null_agent = NullAgent(env)
print('Evaluating Null agent...')
null_returns = evaluate(null_agent,n_episodes=5)

Evaluating Null agent...


ValueError: too many values to unpack (expected 2)

In [63]:
env.observation

{'cumsumfert': 0.0,
 'dap': 4377089,
 'dtt': 0.0,
 'ep': 0.0,
 'grnwt': 0.0,
 'istage': 7,
 'nstres': 0.0,
 'swfac': 0.0,
 'topwt': 0.0,
 'vstage': 0.0,
 'xlai': 0.0}

In [64]:
env.history

{'observation': [], 'action': [], 'reward': []}