In [1]:
from environment import Environment
from magiccube import Cube
from agents import A2C, DQN, PPO
import yaml
import numpy as np
import itertools
import torch
import torch.nn as nn

In [2]:
args = yaml.safe_load(open("config.yaml", "r"))
phase = "f2l"

env = Environment(
    phase=phase,
    args=args["environment"]
)

agent = DQN(env, args["DQN"][phase])

In [3]:
cube = Cube()
print(env.cube)

          G  R  B                   
          B  W  W                   
          B  G  R                   
 W  W  Y  O  W  G  Y  R  O  W  B  R 
 W  O  B  O  G  R  G  R  O  G  B  O 
 O  O  B  R  G  G  O  R  R  B  B  Y 
          W  Y  W                   
          Y  Y  Y                   
          G  Y  Y                   



In [4]:
env.cube.rotate(cube.generate_random_moves())
print(env.cube)

          G  R  B                   
          G  W  O                   
          R  Y  R                   
 R  W  Y  B  B  W  G  W  W  R  B  Y 
 B  O  B  W  G  Y  O  R  R  G  B  O 
 G  O  O  B  R  Y  B  Y  G  O  W  O 
          W  Y  O                   
          G  Y  G                   
          Y  R  W                   



In [5]:
env.scramble()
#obs, reward, done = env.step("B'")
print(env.state2) 
#print(env.state2.shape)
#print(env.algorithm.status())
#print(env.cube)
print(agent.policy_net(agent.state_to_tensor(env.state2)))

[[[ 1  4  2  0  6]
  [ 1  3 -1  1  1]
  [ 3  5  1  2  2]
  [ 4 -1  1  3  3]
  [ 1 -1 -1  4  4]
  [ 5 -1  0  5 22]
  [ 2  0  4  6 23]
  [ 4  0 -1  7 20]
  [ 3  0  4  8 17]
  [-1  3  4  9  9]
  [-1  3 -1 10 10]
  [-1  3  5 11 11]
  [-1 -1  4 12 12]
  [-1 -1  5 13 13]
  [-1  1  5 14  5]
  [-1  2 -1 15 15]
  [-1  4  2 16 14]
  [ 1  5  2 17  8]
  [ 0  3 -1 18 18]
  [ 1  3  4 19  0]
  [ 2 -1  0 20 24]
  [ 0 -1 -1 21 21]
  [ 2 -1  5 22 16]
  [ 0  5  2 23 25]
  [ 2  1 -1 24  7]
  [ 3  0  5 25 19]]]
tensor([[ 0.0049, -0.1664, -0.0466, -0.0674,  0.0008,  0.0369, -0.3669,  0.1426,
         -0.0324,  0.4433, -0.1873, -0.0788,  0.3100,  0.7080, -0.0664,  0.0901,
         -0.3253,  0.5908, -0.0874,  0.0139,  0.0235]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


In [115]:
env.scramble()
print(env.cube)

          O  W  R                   
          R  W  O                   
          B  B  R                   
 G  B  O  W  W  G  Y  W  W  G  R  Y 
 G  O  W  G  G  G  R  R  B  O  B  O 
 W  O  W  G  G  B  Y  R  Y  B  B  R 
          O  Y  R                   
          Y  Y  Y                   
          B  Y  O                   



In [7]:
hidden_size = 128

embedding = nn.Linear(5, hidden_size)

attention = nn.MultiheadAttention(
    embed_dim=hidden_size,
    num_heads=4,
    batch_first=True
)

layer_norm = nn.LayerNorm(hidden_size)

ffn = nn.Sequential(
    nn.Linear(hidden_size, hidden_size),
    nn.ReLU(inplace=True)
)

output_layer = nn.Sequential(
    nn.Linear(hidden_size * 26, hidden_size * 12),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size * 12, hidden_size * 6),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size * 6, hidden_size * 3),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size * 3, hidden_size),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size, hidden_size // 2),
    nn.ReLU(inplace=True),
    nn.Linear(hidden_size // 2, 12)
)


In [8]:
state_tensor = torch.from_numpy(env.state2).float()
embedded = embedding(state_tensor)
attn_output, _ = attention(embedded, embedded, embedded)
attn_output = layer_norm(embedded + attn_output)
ffn_output = ffn(attn_output)
output = layer_norm(attn_output + ffn_output)

flattened = output.view(1, -1)
output = output_layer(flattened)

output

tensor([[-0.0173,  0.0415, -0.0973, -0.0938,  0.0709,  0.0381,  0.0807, -0.1315,
          0.0553, -0.0070, -0.0303,  0.0105]], grad_fn=<AddmmBackward0>)

In [9]:
# Definiamo un layer lineare: input con 3 feature, output con 2 feature
fc1 = nn.Linear(in_features=3, out_features=256)
fc2 = nn.Linear(in_features=256, out_features=2)

# Esempio di input: batch di 4 esempi, ciascuno con 3 feature
input_matrix = torch.tensor([
    [1.0, 2.0, 3.0],
    [4.0, 5.0, 6.0],
    [7.0, 8.0, 9.0],
    [10.0, 11.0, 12.0]
])  # shape: (4, 3)

# Passaggio della matrice al layer
output = fc1(input_matrix)
print(output)
output = fc2(output)

print("Output:", output)

tensor([[2.0591e+00, 3.2830e+00, 8.9323e-01,  ..., 2.3303e+00, 1.0738e-02,
         2.6702e+00],
        [3.4345e+00, 7.7344e+00, 1.8468e+00,  ..., 4.5232e+00, 1.3383e+00,
         6.6655e+00],
        [4.8100e+00, 1.2186e+01, 2.8004e+00,  ..., 6.7161e+00, 2.6659e+00,
         1.0661e+01],
        [6.1855e+00, 1.6637e+01, 3.7540e+00,  ..., 8.9091e+00, 3.9935e+00,
         1.4656e+01]], grad_fn=<AddmmBackward0>)
Output: tensor([[-0.2472, -0.4388],
        [-0.2734, -1.8290],
        [-0.2997, -3.2192],
        [-0.3259, -4.6094]], grad_fn=<AddmmBackward0>)
