In [1]:
# pip install gym 
import gym 

In [31]:
import gym


env = gym.make('CartPole-v0') # create a env 
env.reset()
for _ in range(1000):
    env.render()
    env.step(env.action_space.sample()) # take a random action
env.close()

[2019-10-04 01:59:32,960] Making new env: CartPole-v0
  result = entry_point.load(False)
[2019-10-04 01:59:33,280] You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.


In [6]:
a = 15
b = 'ag4'
print("{} {}".format(a,b))

15 ag4


If we ever want to do better than take random actions at each step, it’d probably be good to actually know what our actions are doing to the environment.

In [117]:
import gym
from gym.monitoring import VideoRecorder

env = gym.make('CartPole-v0').unwrapped
video_path = "/home/shane/ML/2_DQN_cartpole/videos/test.mp4"
video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)

for i_episode in range(50):
    
    observation = env.reset() # start a env, it will return the initial state
    for t in range(1000):
        env.render()
        video_recorder.capture_frame()
#         print(observation)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action) 
        if done:
#             print("observation: \n{}\nreward: {}".format(observation, reward))
#             print("Episode finished after {} timesteps".format(t+1))
            break
print("Saved video.")
video_recorder.close()
video_recorder.enabled = False
env.close()

[2019-10-08 00:34:50,797] Making new env: CartPole-v0
  result = entry_point.load(False)
[2019-10-08 00:34:50,800] Starting new video recorder writing to /home/shane/ML/2_DQN_cartpole/videos/test.mp4


Saved video.


This is just an implementation of the classic “agent-environment loop”. Each timestep, the agent chooses an action, and the environment returns an observation and a reward.

![image.png](attachment:image.png)

In [37]:
env.close()

In [27]:
from gym import spaces
space = spaces.Discrete(9) # Set with 8 elements {0, 1, 2, ..., 7}
x = space.sample()
# assert space.contains(x)
# assert space.n == 8

In [15]:
import gym
env = gym.make('CartPole-v0').unwrapped
env.reset()
# screen = env.render(mode='rgb_array')
screen = env.render(mode='rgb_array').transpose((2, 0, 1))
print(env.state)


[2019-10-06 04:45:20,995] Making new env: CartPole-v0


[-0.02424558  0.04242505 -0.00979132  0.00875781]


  result = entry_point.load(False)


In [19]:
env.state

array([-0.02424558,  0.04242505, -0.00979132,  0.00875781])

In [26]:
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
import torch

env = gym.make('CartPole-v0').unwrapped
resize = T.Compose([T.ToPILImage(),
                    T.Resize(40, interpolation=Image.CUBIC),
                    T.ToTensor()])


def get_cart_location(screen_width):
    world_width = env.x_threshold * 2
    print("env.x_threshold: {}".format(env.x_threshold))
    scale = screen_width / world_width
    print("env.state: {}".format(env.state))
    return int(env.state[0] * scale + screen_width / 2.0)  # MIDDLE OF CART

def get_screen():
    # Returned screen requested by gym is 400x600x3, but is sometimes larger
    # such as 800x1200x3. Transpose it into torch order (CHW).
    screen = env.render(mode='rgb_array').transpose((2, 0, 1))
    # Cart is in the lower half, so strip off the top and bottom of the screen
    _, screen_height, screen_width = screen.shape
    screen = screen[:, int(screen_height*0.4):int(screen_height * 0.8)]
    view_width = int(screen_width * 0.6)
    cart_location = get_cart_location(screen_width)
    if cart_location < view_width // 2:
        slice_range = slice(view_width)
    elif cart_location > (screen_width - view_width // 2):
        slice_range = slice(-view_width, None)
    else:
        slice_range = slice(cart_location - view_width // 2,
                            cart_location + view_width // 2)
    # Strip off the edges, so that we have a square image centered on a cart
    screen = screen[:, :, slice_range]
    print(screen)
    # Convert to float, rescale, convert to torch tensor
    # (this doesn't require a copy)
    screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
    screen = torch.from_numpy(screen)
    # Resize, and add a batch dimension (BCHW)
    return resize(screen).unsqueeze(0).to(device)


env.reset()
plt.figure()
plt.imshow(get_screen().cpu().squeeze(0).permute(1, 2, 0).numpy(),
           interpolation='none')
plt.title('Example extracted screen')
plt.show()

[2019-10-06 05:07:47,832] Making new env: CartPole-v0


env.x_threshold: 2.4
env.state: [ 0.03538013 -0.03979271  0.01533461 -0.03130159]
[[[255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]
  ...
  [255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]]

 [[255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]
  ...
  [255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]]

 [[255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]
  ...
  [255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]
  [255 255 255 ... 255 255 255]]]


  result = entry_point.load(False)


ValueError: some of the strides of a given numpy array are negative. This is currently not supported, but will be added in future releases.

<matplotlib.figure.Figure at 0x7f6d72f1e978>

In [27]:
import numpy as np

s = slice(5,8)

In [30]:
a = np.arange(10)
a.flags['C_CONTIGUOUS']

True

In [37]:
a[s].flags['C_CONTIGUOUS']

True

In [40]:
s = (1,4)

In [41]:
a[s]

IndexError: too many indices for array

In [42]:
x = np.arange(6).reshape(2,3)

In [43]:
 x.flags['C_CONTIGUOUS']

True

In [44]:
import torch.nn as nn

In [45]:
layer = nn.Linear(3,5)

i = torch.Tensor([1,2,3]).unsqueeze(0)

In [47]:
list(layer.parameters())

[Parameter containing:
 tensor([[ 0.1415,  0.4131, -0.1945],
         [-0.4090,  0.5704, -0.2182],
         [ 0.2934,  0.2782, -0.1794],
         [ 0.1727, -0.0038, -0.4985],
         [ 0.3171, -0.3255, -0.3330]], requires_grad=True),
 Parameter containing:
 tensor([ 0.1029, -0.4493, -0.5293, -0.3708,  0.0293], requires_grad=True)]

In [50]:
layer(i)

tensor([[ 0.4873, -0.3721, -0.2176, -1.7014, -1.3035]],
       grad_fn=<AddmmBackward>)

In [81]:
layer(i).max(1)[1].view(1, 1)

tensor([[0]])

In [66]:
import random

In [72]:
random.random()

0.2340890655608222

In [82]:
from collections import namedtuple
profile = namedtuple('profile', ('name', 'age', 'sex'))

In [83]:
p1 = ('shane', 16, 'male')
p2 = ('logan', 21, 'male')
p3 = ('maggie', 22, 'female')
p4 = ('wendy', 18, 'female')
l = []
l.append(p1)
l.append(p2)
l.append(p3)
l.append(p4)

In [87]:
a = random.sample(l,3)
a

[('logan', 21, 'male'), ('maggie', 22, 'female'), ('wendy', 18, 'female')]

In [89]:
list(zip(*a))

[('logan', 'maggie', 'wendy'), (21, 22, 18), ('male', 'female', 'female')]

In [90]:
profile(*zip(*a))

profile(name=('logan', 'maggie', 'wendy'), age=(21, 22, 18), sex=('male', 'female', 'female'))

In [91]:
a = torch.Tensor(1)

In [92]:
a

tensor([5.5166e-38])

In [93]:
a.item()

5.516604889423425e-38

In [97]:
a = torch.Tensor([4,3,2,1])

In [102]:
probs = nn.functional.softmax(a, 0)
probs

tensor([0.6439, 0.2369, 0.0871, 0.0321])

In [108]:
probs.multinomial(1)

tensor([3])