### Deep Learning

In [1]:
import torch
from torch import nn

In [None]:
encoding.masked_fill(mask == False, -1)

In [None]:
layer_norm = nn.LayerNorm(embedding.shape[-1])

In [2]:
class PointWiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff, dropout):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(d_model, d_ff),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_ff, d_model)
        )
    
    def forward(self, x):
        return self.layers(x)

In [3]:
import math

In [4]:
import torch.nn.functional as F

In [5]:
class SelfAttention(nn.Module):
    def __init__(self, d_head):
        super().__init__()
        self.d_head = d_head
    
    def forward(self, q, k, v):
        
        permuted_k = k.permute(3, 2)
        scores = torch.matmul(q, permuted_k)
        scaled_scores = scores / math.sqrt(self.d_head)
        attention_weights = F.softmax(scaled_scores, dim=-1)
        output = torch.matmul(attention_weights, v)
        
        return output, attention_weights

In [6]:
class MultiHeadAttention(nn.Module):
    def __init__(self, attention, d_model, n_heads):
        super().__init__()
        self.attention = attention
        self.d_model = d_model
        self.n_heads = n_heads
        self.d_head = d_model // n_heads
        
        self.to_q = nn.Linear(d_model, n_heads * self.d_head)
        self.to_k = nn.Linear(d_model, n_heads * self.d_head)
        self.to_v = nn.Linear(d_model, n_heads * self.d_head)
        self.linear = nn.Linear(n_heads * self.d_head, d_model)
    
    def split_heads(self, x):
        batch_size, seq_len, d_model = x.size()
        return x.view(batch_size, self.n_heads, seq_len, self.d_head)
    
    def concat(self, x):
        batch_size, n_heads, seq_len, d_head = x.size()
        return x.view(batch_size, seq_len, n_heads * d_head)
    
    def forward(self, pre_q, pre_k, pre_v):
        
        q, k, v = self.to_q(pre_q), self.to_k(pre_k), self.to_v(pre_v)
        
        q, k, v = self.split_heads(q), self.split_heads(k), self.split_heads(v)
        
        output, attention_weights = self.attention(q, k, v)
        
        output = self.concat(x)
        
        projection = self.linear(output)
        return projection, attention_weights

In [8]:
class PositionalEncoding(nn.Module):
    def __init__(self, n, d_model):
        self.n, self.d_model = n, d_model
    
    def forward(self, tokens):
        seq_len = len(tokens)
        embeddings = torch.zeros(seq_len, self.d_model)
        
        for p in range(seq_len):
            for i in range(self.d_model):
                denominator = torch.pow(self.n, (2*i)/self.d_model)
                embeddings[p][i] = torch.sin(p, denominator) if i % 2 == 0 else torch.cos(p, denominator)
        
        return embeddings

### Python

In [1]:
from typing import TypeVar

In [2]:
T =  TypeVar("T")

In [4]:
from dataclasses import field, dataclass

In [5]:
@dataclass
class Camera:
    resolution: int

In [6]:
import unittest

In [None]:
class MediumTestCase(unittest.TestCase):
    def test_avg(self):
        with self.assertRaises

In [7]:
from abc import ABC, abstractclassmethod

In [8]:
class State(ABC):
    def __init__(self, order):
        self.order = order
    
    @abstractclassmethod
    def receive_payment(self): pass

    @abstractclassmethod
    def ship(self): pass

In [11]:
class UnpaidOrder(State):
    def receive_payment(self):
        self.order.set_state(self.order.paid_state)
        print("Ypur payment has been accepeted")
    
    def ship(self):
        print("Can't ship unpaid orders")

In [12]:
class Order:
    def __init__(self):
        self.unpaid_state = UnpaidOrder(order=self)
        
        self.state = self.unpaid_state
    
    def set_state(self, state):
        self.state = state
    
    def receive_payment(self):
        return self.state.receive_payment()

In [None]:
lin.bias.grad.zero_()

In [14]:
import torch

In [13]:
from torch import nn

In [15]:
class PointWiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff, dropout):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(d_model, d_ff),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_ff, d_model)
        )
    
    def forward(self, x):
        return self.layers(x)

In [47]:
def dropout_layer(activations, dropout):
    assert 0 <= dropout <= 1
    if dropout == 0: return torch.zeros_like(activations)
    
    mask = torch.randn(activations.shape)
    mask = mask > dropout
    return (mask*activations) / (1 - dropout)

In [48]:
x = torch.arange(16).reshape((4, 4)).long()

In [49]:
x

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]])

In [50]:
torch.randn_like(x)

RuntimeError: "normal_kernel_cpu" not implemented for 'Long'

In [54]:
dropout_layer(x, 0.6)

tensor([[ 0.0000,  2.5000,  0.0000,  7.5000],
        [ 0.0000, 12.5000, 15.0000,  0.0000],
        [ 0.0000, 22.5000,  0.0000, 27.5000],
        [ 0.0000,  0.0000,  0.0000, 37.5000]])

In [None]:
Loss(D(x), 1) + Loss(D(G(z)), 0)

In [None]:
attention = torch.stack()

In [55]:
def log_softmax(x):
    softmax = x.exp() / x.exp().sum()
    return softmax.log()

In [56]:
def binary(x):
    return x.exp() / 1 + x.exp()

In [57]:
class BatchNorm(nn.Module):
    def __init__(self, mom, eps):
        super().__init__()
        self.mom, self.eps = mom, eps
        self.mults = nn.Parameter(torch.ones(1))
        self.adds = nn.Parameter(torch.zeros(1))
        self.register_buffer('vars', torch.ones(1))
        self.register_buffer('means', torch.zeros(1))
    
    def update_stats(self, x):
        mean, var = x.mean(dim=-1), x.var(dim=-1)
        self.vars.lerp_(mean, self.mom)
        self.means.lerp_(var, self.mom)
        return mean, var
    
    def forward(self, x):
        with torch.no_grad():
            mean, var = self.update_stats(x)
        
        # normalized
        x = (x - mean) / (var + self.eps).sqrt()
        
        # shift
        x = self.mults * x + self.adds
        return x

### Reinforcement Learning

In [58]:
for episode in range(N_EPISODES):
    observation, _ = env.reset()
    in_progress = False
    
    while in_progress:
        predicted_reward = model(torch.from_numpy(observation))
        action_idx = torch.argmax(predicted_reward, dim=-1)
        
        new_observation, reward, done, truncated, info = env.step(action_idx)
        
        if not done:
            predicted_next_reward = model(torch.from_numpy(new_observation))
            max_predicted_next_reward = torch.max(predicted_reward)
            target_reward = reward + GAMMA * max_predicted_next_reward
            
            loss = loss_func(target_reward, predicted_next_reward)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
            observation = torch.from_numpy(new_observation)
        else:
            in_progress = False
            observation, _ = env.reset()

NameError: name 'N_EPISODES' is not defined

In [60]:
class DeepQNetwork(nn.Module):
    def __init__(self, n_observations, n_actions):
        self.layers = nn.Sequential(
            nn.Linear(n_observations, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, n_actions)
        )
    
    def forward(self, x):
        return self.layers(x)

In [61]:
def discount_reward(rewards, discount_factor):
    n_rewards = len(rewards)
    discount = discount_factor ** torch.arange(n_rewards)
    return discount * rewards

In [None]:
state, reward, truncated, info, done