# NLP
- huggingface下transformers库的分词工具：Tokenizer快速使用
- 安装torchtext时需要考虑对应版本的问题：
  - pytroch的的版本为1.a.b，则torchtext的版本为0.(a+1).b
  - 若pytorch == 1.13.1，则pip install torchtext == 0.14.1

In [None]:
import torch
from torch import nn
embed = torch.nn.Embedding(num_embeddings=10, embedding_dim=4)#[10,4]
# word2id的作用，1表示EOS，2表示PAD
batch = [[3, 6, 5, 6, 7, 1], [6, 4, 7, 9, 5, 1], [4, 5, 8, 7, 1, 2]]#[3,6]
batch = torch.LongTensor(batch)
batch = batch.reshape(6, 3)  # [seq_len,batch_size]排序>>为了在RNN模型中进行训练
batch_embed = embed(batch)  # [6,3,10][10,4]=[6,3,4]，不能超过10类
batch_embed.size()

# CV
- Pytorch Image Models (timm)有常用的视觉模型：Pytorch视觉模型库--timm

In [None]:
## 加载CV领域的模型结构（方法一）

# 参考链接：https://blog.csdn.net/me_yundou/article/details/109218273
import torch
import torchvision

# resnet = torchvision.models.resnet50(pretrained=False)#无预训练参数
model = torchvision.models.vgg19(pretrained=True)
print(model)#打印网络结构

# model.classifier.add_module("add_linear",nn.Linear(1000,10)) # 在resnet50的classfier里加一层
# model.classifier[6] = nn.Linear(4096,10) # 修改对应层,编号相对应

# model=list(model.children())[:-1]#去掉后一层只保留（2048/512，7，7）
# model = torch.nn.Sequential(*model)

# input = torch.randn(2,3,224,224)
# output = model(input)
# output.size()

In [None]:
## 加载CVCV领域的模型结构（方法二）

import timm
model = timm.create_model('vgg19',pretrained=True)
# model = timm.create_model("hf_hub:timm/vgg19.tv_in1k", pretrained=True)

# pytorch
## 定义模型

In [None]:
import torch
from torch import nn
from torchkeras import summary

## 继承nn.Module基类构建自定义模型

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3)
        self.pool1 = nn.MaxPool2d(kernel_size = 2,stride = 2)
        self.conv2 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5)
        self.pool2 = nn.MaxPool2d(kernel_size = 2,stride = 2)
        self.dropout = nn.Dropout2d(p = 0.1)
        self.adaptive_pool = nn.AdaptiveMaxPool2d((1,1))
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(64,32)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(32,1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self,x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.dropout(x)
        x = self.adaptive_pool(x)
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        y = self.sigmoid(x)
        return y 
      
net = Net()
print(net)
summary(net, input_shape=(3, 32, 32))

## 使用nn.Sequential按层顺序构建模型(无需定义forward方法)

# 1.利用add_module方法
net = nn.Sequential()
net.add_module("conv1",nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3))
net.add_module("pool1",nn.MaxPool2d(kernel_size = 2,stride = 2))
net.add_module("conv2",nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5))
net.add_module("pool2",nn.MaxPool2d(kernel_size = 2,stride = 2))
net.add_module("dropout",nn.Dropout2d(p = 0.1))
net.add_module("adaptive_pool",nn.AdaptiveMaxPool2d((1,1)))
net.add_module("flatten",nn.Flatten())
net.add_module("linear1",nn.Linear(64,32))
net.add_module("relu",nn.ReLU())
net.add_module("linear2",nn.Linear(32,1))
net.add_module("sigmoid",nn.Sigmoid())
# print(net)

# 2.利用变长参数
net1 = nn.Sequential(
    nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3),
    nn.MaxPool2d(kernel_size = 2,stride = 2),
    nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5),
    nn.MaxPool2d(kernel_size = 2,stride = 2),
    nn.Dropout2d(p = 0.1),
    nn.AdaptiveMaxPool2d((1,1)),
    nn.Flatten(),
    nn.Linear(64,32),
    nn.ReLU(),
    nn.Linear(32,1),
    nn.Sigmoid()
)
# print(net1)

# 3.利用OrderedDict
from collections import OrderedDict
net2 = nn.Sequential(OrderedDict(
          [("conv1",nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3)),
            ("pool1",nn.MaxPool2d(kernel_size = 2,stride = 2)),
            ("conv2",nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5)),
            ("pool2",nn.MaxPool2d(kernel_size = 2,stride = 2)),
            ("dropout",nn.Dropout2d(p = 0.1)),
            ("adaptive_pool",nn.AdaptiveMaxPool2d((1,1))),
            ("flatten",nn.Flatten()),
            ("linear1",nn.Linear(64,32)),
            ("relu",nn.ReLU()),
            ("linear2",nn.Linear(32,1)),
            ("sigmoid",nn.Sigmoid())
          ])
        )
# print(net2)

## 继承nn.Module基类构建模型并辅助应用模型容器进行封装

# 1.nn.Sequential作为模型容器
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3),
            nn.MaxPool2d(kernel_size = 2,stride = 2),
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5),
            nn.MaxPool2d(kernel_size = 2,stride = 2),
            nn.Dropout2d(p = 0.1),
            nn.AdaptiveMaxPool2d((1,1))
        )
        self.dense = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32,1),
            nn.Sigmoid()
        )
    def forward(self,x):
        x = self.conv(x)
        y = self.dense(x)
        return y 
# net = Net()
# print(net)

# 2.nn.ModuleList作为模型容器
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.layers = nn.ModuleList([
            nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3),
            nn.MaxPool2d(kernel_size = 2,stride = 2),
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5),
            nn.MaxPool2d(kernel_size = 2,stride = 2),
            nn.Dropout2d(p = 0.1),
            nn.AdaptiveMaxPool2d((1,1)),
            nn.Flatten(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32,1),
            nn.Sigmoid()]
        )
    def forward(self,x):
        for layer in self.layers:
            x = layer(x)
        return x
# net = Net()
# print(net)

# 3.nn.ModuleDict作为模型容器
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.layers_dict = nn.ModuleDict({"conv1":nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3),
               "pool": nn.MaxPool2d(kernel_size = 2,stride = 2),
               "conv2":nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5),
               "dropout": nn.Dropout2d(p = 0.1),
               "adaptive":nn.AdaptiveMaxPool2d((1,1)),
               "flatten": nn.Flatten(),
               "linear1": nn.Linear(64,32),
               "relu":nn.ReLU(),
               "linear2": nn.Linear(32,1),
               "sigmoid": nn.Sigmoid()
              })
    def forward(self,x):
        layers = ["conv1","pool","conv2","pool","dropout","adaptive",
                  "flatten","linear1","relu","linear2","sigmoid"]
        for layer in layers:
            x = self.layers_dict[layer](x)
        return x
# net = Net()
# print(net)

## 梯度和求导
[一文解释 PyTorch求导相关 (backward, autograd.grad)](https://zhuanlan.zhihu.com/p/279758736)

In [None]:
import torch    

x = torch.tensor(2. ,requires_grad=True)
a = torch.add(x,1)
b = torch.add(x,2)
y = torch.mul(a,b)
y.backward()
print("=====backward=====")
print("requires_grad: ", x.requires_grad, a.requires_grad, b.requires_grad, y.requires_grad)
print("is_leaf: ", x.is_leaf, a.is_leaf, b.is_leaf, y.is_leaf)
print("grad: ", x.grad, a.grad, b.grad, y.grad)

x = torch.tensor(2., requires_grad=True)
a = torch.add(x, 1)
b = torch.add(x, 2)
y = torch.mul(a, b)
grad = torch.autograd.grad(outputs=y, inputs=x)
print("=====autograd.grad=====")
print("requires_grad: ", x.requires_grad, a.requires_grad, b.requires_grad, y.requires_grad)
print("is_leaf: ", x.is_leaf, a.is_leaf, b.is_leaf, y.is_leaf)
print("grad: ", x.grad, a.grad, b.grad, y.grad)
print(grad)

In [None]:
print("=====backward=====")
x = torch.tensor(2., requires_grad=True)
y = torch.tensor(3., requires_grad=True)
z = x * x * y
z.backward()
print(x.grad, y.grad)

print("=====autograd.grad=====")
x = torch.tensor(2., requires_grad=True)
y = torch.tensor(3., requires_grad=True)
z = x * x * y
grad_x = torch.autograd.grad(outputs=z, inputs=x)
print(grad_x)

print("=====保留计算图，求偏导=====")
x = torch.tensor(2.).requires_grad_()
y = torch.tensor(3.).requires_grad_()
z = x * x * y
grad_x = torch.autograd.grad(outputs=z, inputs=x, retain_graph=True) # 保留计算图和梯度
grad_y = torch.autograd.grad(outputs=z, inputs=y)
print(grad_x, grad_y)


x = torch.tensor(2.).requires_grad_()
y = torch.tensor(3.).requires_grad_()
z = x * x * y
print("=====autograd.grad,二阶求导=====")
grad_x = torch.autograd.grad(outputs=z, inputs=x, create_graph=True) # 保留原图的基础上创建新图
grad_xx = torch.autograd.grad(outputs=grad_x, inputs=x)
print(grad_x, grad_xx)

x = torch.tensor(2.).requires_grad_()
y = torch.tensor(3.).requires_grad_()
z = x * x * y
print("=====autograd.grad() + backward(),二阶求导=====")
grad = torch.autograd.grad(outputs=z, inputs=[x, y], create_graph=True)
grad[0].backward()
print(x.grad,y.grad,grad[0].grad,grad[1].grad)

x = torch.tensor(2.).requires_grad_()
y = torch.tensor(3.).requires_grad_()
z = x * x * y
print("=====backward() + autograd.grad(),二阶求导=====")
z.backward(create_graph=True)
grad_xx = torch.autograd.grad(outputs=x.grad, inputs=x)
print(grad_xx, x.grad)

In [None]:
# 梯度清零
print("=====backward() + backward(),二阶求导=====")
x = torch.tensor(2.).requires_grad_()
y = torch.tensor(3.).requires_grad_()
z = x * x * y
z.backward(create_graph=True) # x.gtad = dz/dx = 12
x.grad.backward() # 二阶：d(2xy)/dx = 2y=6 6+12=18
print(x.grad)

print("=====梯度清零,二阶求导=====")
x = torch.tensor(2.).requires_grad_()
y = torch.tensor(3.).requires_grad_()
z = x * x * y
z.backward(create_graph=True)
x.grad.data.zero_() # 梯度清零
x.grad.backward()
print(x.grad)

In [None]:
# 向量求导: 只能标量对标量，标量对向量求梯度
x = torch.tensor([1., 2.]).requires_grad_()
y = x * x
y.sum().backward() # y.sum() = x1^2 + x2^2,sum对求偏导没有影响
# grad_x = torch.autograd.grad(outputs=y.sum(), inputs=x)
print(x.grad)

print("=====求导计算的雅可比矩阵=====")
x = torch.tensor([1., 2.]).requires_grad_()
y = x * x
y.backward(gradient=torch.ones_like(y))
# grad_x = torch.autograd.grad(outputs=y, inputs=x, grad_outputs=torch.ones_like(y))
print(x.grad)

In [None]:
# 使用detach()切断
x = torch.tensor([2.] ,requires_grad=True)
a = torch.add(x,1).detach()
b = torch.add(x,2)
y = torch.mul(a,b)
y.backward() # dy/dx = dy/da * da/dx + dy/db * db/dx
print("requires_grad: ", x.requires_grad, a.requires_grad, b.requires_grad, y.requires_grad)
print("is_leaf: ", x.is_leaf, a.is_leaf, b.is_leaf, y.is_leaf)
print("grad: ", x.grad, a.grad, b.grad, y.grad)

# vit_pytorch
- https://github.com/lucidrains/vit-pytorch

In [None]:
import torch
from vit_pytorch import ViT

v = ViT(
    image_size = 256,
    patch_size = 32,
    num_classes = 1000,
    dim = 1024,
    depth = 6,
    heads = 16,
    mlp_dim = 2048,
    dropout = 0.1,
    emb_dropout = 0.1
)

img = torch.randn(1, 3, 256, 256)

preds = v(img) # (1, 1000)
preds.shape

# wandb  
- https://wandb.ai/

In [None]:
import wandb
import random

# start a new wandb run to track this script
wandb.init(settings=wandb.Settings(start_method="thread"))
wandb.init(
    # set the wandb project where this run will be logged
    project="my-awesome-project",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 0.02,
    "architecture": "CNN",
    "dataset": "CIFAR-100",
    "epochs": 10,
    }
)


# simulate training
epochs = 10
offset = random.random() / 5
for epoch in range(2, epochs):
    acc = 1 - 2 ** -epoch - random.random() / epoch - offset
    loss = 2 ** -epoch + random.random() / epoch + offset
    
    # log metrics to wandb
    wandb.log({"acc": acc, "loss": loss})
    
# [optional] finish the wandb run, necessary in notebooks
wandb.finish()

# AutoGluon 
- https://auto.gluon.ai/stable/index.html
- Tabular
- Multimodal
- Time Series


In [None]:
from autogluon.tabular import TabularDataset, TabularPredictor

data_root = '../data/autogluon/'
train_data = TabularDataset(data_root + 'train.csv')
test_data = TabularDataset(data_root + 'test.csv')

predictor = TabularPredictor(label='class').fit(train_data=train_data)# hyperparameters='multimodal',num_stack_levels=1, num_bagging_folds=5
predictions = predictor.predict(data=test_data)