This section will introduce to how experiments can be peformed on various Samplers

In [1]:
import sys
sys.path.append('..')
import torch
import matplotlib
matplotlib.use('Agg')
import torch.nn as nn
import numpy as np
import os
import seaborn as sns
from rvi_sampling import utils
from rvi_sampling.samplers import ISSampler, ABCSampler, MCSampler
from rvi_sampling.distributions.proposal_distributions import FunnelProposal, SimonsSoftProposal

Roboschool not installed. 


In [2]:
DIMENSIONS = 1    # Set dimension of the random walk
OUTPUT_SIZE = 2   # The output dimension of sampler networks (action, action probabilities)
BIASED = False

Command line parsers can be created using the `utils.parsers.create_parser` function. This adds basic command line arguments for rvi sampling and basic experimental arguments
```
parser = utils.parsers.create_parser('1D random walk', 'random_walk')
```


additional required arguments can be added using the `parser.add_argument` function
```
parser.add_argument('-cycles', '--cycles', type=int, default=15,
                    help='number of train-test cycles.')
```

The `parser.parse_args` function execute the parser on the command line arguments and we get the parameters in the variable assigned to it.
```
args = parser.parse_args()
```

In [4]:
# For the purpose of tutorial, the args variable is set manually

### rvi arguments

class Arguments():
    def __init__(
        entropy = 0,  # Rvi environment
        baseline_decay = 0.99,
        learning_rate = 0.001,
        baseline_learning_rate = 0.001,
        only_rvi = False,
        no_train = False,
        baseline_type = 'moving_average',
        notime = True,
        gamma = 1,
        rewardclip = -10,
        gae = False,
        lam = 1.0,
        n_agents = 1,
        plot_posterior = False,
        neural_network = [16, 16],
        pretrained = None,
        samples = 1000, # experimental arguments
        sampler_seed = 0,
        n_cpus = 3,
        no_tensorboard = False,
        name = 'results',
        IS_proposal = 'funnel',
        softness_coefficient = 1.0,
        override_endpoint = False,
        outfolder = './',
        profile_performance = False
    ):
        entropy = entropy
        baseline_decay = baseline_decay
        learning_rate = learning_rate
        baseline_learning_rate = baseline_learning_rate
        only_rvi = only_rvi
        no_train = no_train
        baseline_type = baseline_type
        notime = notime
        gamma = gamma
        rewardclip = rewardclip
        gae = gae
        lam = lam
        n_agents = n_agents,
        plot_posterior = plot_posterior,
        neural_network = neural_network,
        pretrained = pretrained,
        samples = samples, # experimental arguments
        sampler_seed = sampler_seed,
        n_cpus = n_cpus,
        no_tensorboard = no_tensorboard,
        name = name,
        IS_proposal = IS_proposal,
        softness_coefficient = softness_coefficient,
        override_endpoint = override_endpoint,
        outfolder = outfolder,
        profile_performance = profile_performance

In [None]:
# This sets the global seed for the random number generators
utils.common.set_global_seeds(args.sampler_seed)
sns.set_style('whitegrid')

# Create the folder name for where the results are to be stored
folder_name = utils.io.create_folder_name(args.outfolder, args.name+'_'+str(args.sampler_seed)+'_'+str(args.rw_seed)+'_'+str(args.method))

# Training results are stored in separate train folder
train_folder_name = os.path.join(folder_name, 'training_results')

train_folder_to_save_in = os.path.join(train_folder_name, '0')
utils.io.create_folder(train_folder_to_save_in)

# This tracks the training kl divergence results cumulatively
kl_train_cumulative_track = os.path.join(folder_name, 'kl_training_cumulative.txt')
kl_train_track = os.path.join(folder_name, 'kl_training.txt')

# This trackes the proposal success rates cumulatively
prop_train_cumulative_track = os.path.join(folder_name, 'prop_training_cumulative.txt')
prop_train_track = os.path.join(folder_name, 'prop_training.txt')

# These functions create the folders required for saving results
utils.io.create_folder(folder_name)
utils.io.create_folder(train_folder_name)

In [None]:
# This function creates the random walk with the given parameters
# The n_agents parameter shows how many agents are interacting with the random walk
# Different stochastic processes can be implemented similar to random walk
rw, analytic = utils.stochastic_processes.create_rw(args, biased=BIASED, n_agents=args.n_agents)

In [None]:
# This argument decides if we want to override the endpoint of the random walk process
if args.override_endpoint:
    rw.xT = np.array([ args.endpoint ])

In [None]:
utils.io.touch(os.path.join(folder_name, 'start={}'.format(rw.x0)))
utils.io.touch(os.path.join(folder_name, 'end={}'.format(rw.xT)))

In [None]:
# this argument sets where the ISproposal should push toward
push_toward = [-args.rw_width, args.rw_width]

# The soft proposal makes IS proposal softer such that the push towards is lighter
# the intensity of softness is given by the softness coefficient
if args.IS_proposal == 'soft':
    proposal = SimonsSoftProposal(push_toward, softness_coeff=args.softness_coefficient)
else:
    proposal = FunnelProposal(push_toward)

if args.method == 'ISSampler':
    sampler = ISSampler(proposal, seed=args.sampler_seed)
elif args.method == 'MCSampler':
    sampler = MCSampler(seed=args.sampler_seed)
elif args.method == 'ABCSampler':
    sampler = ABCSampler('slacked',seed=args.sampler_seed)
else:
    raise ValueError('Unknown method')

In [None]:
def kl_function(estimated_distribution):
    return analytic.kl_divergence(estimated_distribution, rw.xT)

In [None]:
sampler.set_diagnostic(utils.diagnostics.create_diagnostic(sampler._name, args, folder_name, kl_function))

print('True Starting Position is:{}'.format(rw.x0))
print('True Ending Position is: {}'.format(rw.xT))
print('Analytic Starting Position: {}'.format(analytic.expectation(rw.xT[0])))

train_results = None

utils.io.touch(kl_train_track)
utils.io.touch(kl_train_cumulative_track)
utils.io.touch(prop_train_track)
utils.io.touch(prop_train_cumulative_track)

In [None]:
for i in range(1, args.cycles+1):
    train_results_new = sampler.solve(rw, args.samples)

    # technically doing this saving doesn't take too long so doesn't need to be run
    # in a background thread. This is good because it saves time of having to copy
    # the policy for saving etc.
    if train_results is None:
        train_results = train_results_new
    else:
        # augment the old Results object.
        train_results._all_trajectories.extend(train_results_new.all_trajectories())
        train_results._trajectories.extend(train_results_new.trajectories())
        train_results._posterior_particles = np.hstack([train_results.posterior(),
                                                        train_results_new.posterior()])

        train_results._posterior_weights = np.hstack([train_results.posterior_weights(),
                                                      train_results_new.posterior_weights()])


    steps_so_far = str(i * args.samples)


    train_folder_to_save_in = os.path.join(train_folder_name, str(i))
    utils.io.create_folder(train_folder_to_save_in)
    print('Training Phase:')
    kld = utils.analysis.analyze_samplers_rw([train_results], args, None, rw,
                                       policy=None, analytic=analytic) # don't save these things again

    utils.io.stash(kl_train_cumulative_track, steps_so_far + ', ' + str(kld[0]))
    utils.io.stash(prop_train_cumulative_track, steps_so_far + ', ' + str(train_results.prop_success()))


    kld = utils.analysis.analyze_samplers_rw([train_results_new], args, train_folder_to_save_in, rw,
                                       policy=None, analytic=analytic) # don't save these things again
    utils.io.stash(kl_train_track, steps_so_far + ', ' + str(kld[0]))
    utils.io.stash(prop_train_track, steps_so_far + ', ' + str(train_results_new.prop_success()))

We now look at how different samplers behave

# Monte Carlo Sampler

Monte Carlo Sampler takes a random direction at each step of the random walk. For random walks with endpoints near the start region MC Samplers work well.

For endpoint=0, the trajectories obtained from the sampler will be

![alt text](img/successful_trajectories_mc_end0.jpg)

For endpoint=8, the trajectories will be

![no_img](img/successful_trajectories_mc_end8.jpg)

# Importance Sampling Sampler

endpoint=0

![no_img](img/successful_trajectories_is_end0.jpg)

endpoint=8

![no_img](img/successful_trajectories_is_end8.jpg)

# Importance Sampling with Soft Proposal

endpoint=0

![no_img](img/successful_trajectories_issoft_end0.jpg)

endpoint=8

![no_img](img/successful_trajectories_issoft_end8.jpg)

# RVI Sampler

### Initial Stages of Training

TODO

Now we look at how different samplers(MCSampler, ISSampler with handmade proposal, ISSampler with a soft proposal) behave in different conditions. The endpoint of the process is changed to reflect different difficulty conditions. Endpoints farther from the starting position requires low probability trajectories to be successful. Monte Carlo sampler performs poorly in these adverse conditions.

Comparisons between samplers on random walk of varying difficulties

![no_img](img/difficulty_comparissons.jpg)