In [113]:
import gym
import numpy as np
import librosa

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [123]:
class DQN(nn.Module):
    
    def __init__(self, n_steps):
        super(DQN, self).__init__()
        
        self.model = nn.Sequential(
            nn.Linear(n_steps, n_steps * 2),
            nn.BatchNorm1d(n_steps * 2),
            nn.ReLU(),
            
            nn.Linear(n_steps * 2, n_steps * 4),
            nn.BatchNorm1d(n_steps * 4),
            nn.ReLU(),
            
            nn.Linear(n_steps * 4, n_steps * 2),
            nn.BatchNorm1d(n_steps * 2),
            nn.ReLU()
        )

        self.head = nn.Linear(n_steps * 2, n_steps)

    def forward(self, x):
        x = self.model(x)
        x = self.head(x)
        return x

In [125]:
net = DQN(16)
print(net)

DQN(
  (model): Sequential(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=32, out_features=64, bias=True)
    (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=32, bias=True)
    (7): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
  )
  (head): Linear(in_features=32, out_features=16, bias=True)
)


In [107]:
# inspired by RaveForce: https://github.com/chaosprint/RaveForce/blob/master/Python/raveforce.py
# also contains many more action spaces

# TO DO: inherit from gym.Space
class SequenceActions:
    def __init__(self, n_steps):
        self.n_steps = n_steps
    
    def sample(self):
        return np.random.choice([0, 1], size=(self.n_steps,))
    

In [110]:
class MidiEnv(gym.Env):
    def __init__(self, n_bars = 4, beats_per_bar = 4):
        '''Initialises a np array of fixed length for kick and snare'''

        self.n_bars = n_bars
        self.beats_per_bar = beats_per_bar
        
        self.kick_seq = np.array([1, 0, 0, 0] * self.n_bars)
        # self.kick_seq = np.random.choice([0, 1], size=(get_n_steps(),), p=[3./4, 1./4])
        self.snare_seq = np.zeros(self.get_n_steps(), dtype = np.int8)
        
        self.action_space = SequenceActions(n_steps = self.get_n_steps())
        self.reward_range = (0.0, 1.0)
        
        # TO DO - how to define the observation space? it's just a np array of length n_steps
        self.observation_space = None
    
    def get_n_steps(self):
        return self.n_bars * self.beats_per_bar
    
    def __len__(self):
        return self.get_n_steps()

    def step(self, action : np.ndarray):
        reward = self.calculate_reward(self.kick_seq, action)
        self.snare_seq = action
        return self.kick_seq, reward, False, {}
    
    def calculate_reward(self, kick_seq : np.ndarray, snare_seq: np.ndarray):
        assert kick_seq.shape == snare_seq.shape
        diff = np.sum(kick_seq - snare_seq)
        return np.abs(diff/self.get_n_steps()) # normalized sum to fit in the reward range
    
    def render(self):
        '''Renders the np array as a sequence of MIDI notes'''
        print("Snare sequence", self.snare_seq)
        print("Kick sequence", self.kick_seq)
        return

In [111]:
env = MidiEnv(n_bars = 8)
threshold = 0.2
for _ in range(2):
    obs, reward, _, _ = env.step(env.action_space.sample())
    if reward > threshold:
        env.render()
        print("Reward", reward)
env.close()

Snare sequence [1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 1 0 0 1 1 1 1 0 0 1 0 1 0 1 1 0 0]
Kick sequence [1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0]
Reward 0.3125
Snare sequence [1 0 0 1 0 0 1 1 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 0 1 0 1 1]
Kick sequence [1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0]
Reward 0.4375


# Default gym environment render

In [None]:
import gym
env = gym.make('MountainCar-v0')
env.reset()
for _ in range(1000):
    env.render()
    env.step(env.action_space.sample()) # take a random action
env.close()

In [None]:
for _ in range(10):
    x = np.random.choice([0, 1], size=(16,))
    y = np.random.choice([0, 1], size=(16,))

    print(np.abs(np.sum(y - x)) / 16.0)

zero = np.zeros(16)
sixteen = np.array([1] * 16)
print(np.abs(np.sum(sixteen - zero)) / 16.0)