# Managing Randomness in Experiments

In [1]:
import os
import torch
import random
import numpy as np

import itertools
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn import svm
from sklearn import tree
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import zero_one_loss
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

from dataclasses import dataclass

# Holding Random State in a Class

I typically write a class that manages the various random states used in my experiments.

In [2]:
@dataclass
class Seeds:
    pythonhash: int = 0
    pythonrand: int = 0
    numpy: int = 0
    torch: int = 0


class SeedControl:
    """Manage random seeds"""

    def __init__(self, seeds=Seeds()):
        self.s = seeds
        self._set_seeds()

    def update_all_seeds(self, seed: int):
        """Fix all seeds to the same seed"""
        self.s = Seeds(
            pythonhash=seed, 
            pythonrand=seed, 
            numpy=seed, 
            torch=seed
        )
        
        self._set_seeds()

    def _set_seeds(self):
        """Set the random seeds in the environement"""
        os.environ['PYTHONHASHSEED'] = str(self.s.pythonhash)
        random.seed(self.s.pythonrand)
        np.random.seed(self.s.numpy)
        torch.manual_seed(self.s.torch)

    def get_seeds(self):
        return {
            'PythonHash': self.s.pythonhash,
            'PythonRand': self.s.pythonrand,
            'Numpy': self.s.numpy,
            'Torch': self.s.torch
        }

In [3]:
def print_random_states():
    """"""
    numpy_state = np.random.get_state()[1][0]
    torch_state = torch.random.initial_seed()
    python_hash_state = os.environ['PYTHONHASHSEED']
    
    print(f"Numpy's random state: {numpy_state}")
    print(f"Pytorch's random state: {torch_state}")
    print(f"Python's environment state: {python_hash_state}")

In [4]:
seed_control = SeedControl()

In [5]:
seed_control.get_seeds()

{'PythonHash': 0, 'PythonRand': 0, 'Numpy': 0, 'Torch': 0}

In [6]:
print_random_states()

Numpy's random state: 0
Pytorch's random state: 0
Python's environment state: 0


In [7]:
seed_control.update_all_seeds(10)

In [8]:
seed_control.get_seeds()

{'PythonHash': 10, 'PythonRand': 10, 'Numpy': 10, 'Torch': 10}

In [9]:
print_random_states()

Numpy's random state: 10
Pytorch's random state: 10
Python's environment state: 10


## A Note on Setting Seeds

Setting a seed does not mean that all subsequent calls that require randomness will be fixed the same value! The seed sets a psuedorandom state for sequences of random numbers. Check this out:

In [14]:
seed_control = SeedControl()

In [15]:
print('1st random number = ', random.random())
print('2nd random number = ', random.random())
print('1st random int = ', random.randint(1, 100))
print('2nd random int = ', random.randint(1, 100))

1st random number =  0.8444218515250481
2nd random number =  0.7579544029403025
1st random int =  54
2nd random int =  6


In [16]:
# resetting the seed back to it's starting value
seed_control = SeedControl()

In [17]:
print('\n3rd random number = ', random.random())
print('4th random number = ', random.random())
print('3rd random int = ', random.randint(1, 100))
print('4th random int = ', random.randint(1, 100))


3rd random number =  0.8444218515250481
4th random number =  0.7579544029403025
3rd random int =  54
4th random int =  6


# Random Variation in our Data