In [1]:
from environment import Environment
from magiccube import Cube
from agents import A2C, DQN, PPO
import yaml
import numpy as np
import itertools
import torch
import torch.nn as nn

In [2]:
args = yaml.safe_load(open("config.yaml", "r"))
phase = "f2l"

env = Environment(
    phase=phase,
    args=args["environment"]
)

agent = DQN(env, phase, args)

(22,)


In [3]:
cube = Cube()
print(env.cube)

          G  R  R                   
          O  W  G                   
          G  G  Y                   
 O  B  Y  R  O  G  O  R  B  W  B  W 
 O  O  W  G  G  R  W  R  B  W  B  W 
 Y  O  O  Y  G  R  G  R  O  B  B  B 
          B  Y  W                   
          Y  Y  Y                   
          R  Y  W                   



In [4]:
env.cube.rotate(cube.generate_random_moves())
print(env.cube)

          O  R  R                   
          O  W  G                   
          B  W  R                   
 G  B  Y  O  B  W  G  Y  G  Y  W  Y 
 B  O  R  Y  G  W  O  R  W  G  B  Y 
 Y  Y  B  W  R  B  R  O  W  G  G  B 
          O  B  W                   
          O  Y  G                   
          R  R  O                   



In [5]:
env.scramble()
#obs, reward, done = env.step("B'")
print(env.state2) 
#print(env.state2.shape)
#print(env.algorithm.status())
#print(env.cube)
print(agent.policy_net(agent.state_to_tensor(env.state2)))

[[[ 3  1  5  0  2]
  [ 1  3 -1  1  1]
  [ 0  3  4  2 17]
  [ 1 -1  2  3  7]
  [ 1 -1 -1  4  4]
  [ 1 -1  4  5  3]
  [ 0  2  5  6 25]
  [ 4  2 -1  7 14]
  [ 4  1  3  8  0]
  [-1  3  4  9  9]
  [-1  3 -1 10 10]
  [-1  3  5 11 11]
  [-1 -1  4 12 12]
  [-1 -1  5 13 13]
  [-1  5  0 14 22]
  [-1  2 -1 15 15]
  [-1  5  1 16  5]
  [ 4  2  0 17 23]
  [ 0  3 -1 18 18]
  [ 4  2  1 19  6]
  [ 0 -1  2 20 24]
  [ 0 -1 -1 21 21]
  [ 5 -1  2 22 16]
  [ 5  1  2 23  8]
  [ 0  4 -1 24 20]
  [ 0  5  3 25 19]]]
tensor([[-0.1773,  0.5022, -0.1550, -0.3388, -0.0328,  0.0403, -0.0099,  0.2207,
         -0.0181,  0.0616, -0.0954, -0.0290,  0.0078,  0.0048,  0.0152,  0.0463,
          0.1078,  0.1360,  0.3220, -0.1768,  0.2346,  0.1436]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


In [6]:
env.scramble()
print(env.cube)

          G  G  R                   
          G  W  B                   
          W  G  Y                   
 Y  R  O  G  O  B  R  R  W  G  W  O 
 O  O  R  W  G  B  W  R  O  W  B  B 
 B  O  Y  G  G  B  W  R  R  W  B  Y 
          R  Y  O                   
          Y  Y  Y                   
          O  Y  B                   



In [7]:
hidden_size = 128

embedding = nn.Linear(5, hidden_size)

attention = nn.MultiheadAttention(
    embed_dim=hidden_size,
    num_heads=4,
    batch_first=True
)

layer_norm = nn.LayerNorm(hidden_size)

ffn = nn.Sequential(
    nn.Linear(hidden_size, hidden_size),
    nn.ReLU(inplace=True)
)

output_layer = nn.Sequential(
    nn.Linear(hidden_size * 26, hidden_size * 12),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size * 12, hidden_size * 6),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size * 6, hidden_size * 3),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size * 3, hidden_size),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size, hidden_size // 2),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size // 2, 12)
)


In [8]:
state_tensor = torch.from_numpy(env.state2).float()
embedded = embedding(state_tensor)
attn_output, _ = attention(embedded, embedded, embedded)
attn_output = layer_norm(embedded + attn_output)
ffn_output = ffn(attn_output)
output = layer_norm(attn_output + ffn_output)

flattened = output.view(1, -1)
output = output_layer(flattened)

output

tensor([[ 0.0269, -0.0699,  0.0523, -0.0387, -0.0332,  0.0554,  0.0810, -0.1355,
          0.1129, -0.0106, -0.0498, -0.1004]], grad_fn=<AddmmBackward0>)

In [9]:
# Definiamo un layer lineare: input con 3 feature, output con 2 feature
fc1 = nn.Linear(in_features=3, out_features=256)
fc2 = nn.Linear(in_features=256, out_features=2)

# Esempio di input: batch di 4 esempi, ciascuno con 3 feature
input_matrix = torch.tensor([
    [1.0, 2.0, 3.0],
    [4.0, 5.0, 6.0],
    [7.0, 8.0, 9.0],
    [10.0, 11.0, 12.0]
])  # shape: (4, 3)

# Passaggio della matrice al layer
output = fc1(input_matrix)
print(output)
output = fc2(output)

print("Output:", output)

tensor([[ 1.5589,  0.2432, -0.2406,  ..., -0.7252,  0.1765,  0.4300],
        [ 2.1287,  1.6550,  0.6083,  ..., -0.7340, -0.0186,  0.3546],
        [ 2.6986,  3.0669,  1.4573,  ..., -0.7429, -0.2137,  0.2791],
        [ 3.2684,  4.4787,  2.3062,  ..., -0.7518, -0.4087,  0.2037]],
       grad_fn=<AddmmBackward0>)
Output: tensor([[-0.8749,  0.7572],
        [-1.9000,  1.9852],
        [-2.9250,  3.2132],
        [-3.9501,  4.4413]], grad_fn=<AddmmBackward0>)
