In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from d2l import torch as d2l
import math
import random
import time
from torch.optim.optimizer import Optimizer
from sklearn.metrics import r2_score
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from collections import deque
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.autograd import Function
from torch.autograd import Variable as V
device = d2l.try_gpu()

In [77]:
#vfosgd_pf
class VFOSGD_PF(Optimizer):


    def __init__(self, params, lr=1e-1,r=0.9):  #r on behalf of fractional order 
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
            
        if not -2.0 <= r:
            raise ValueError("Invalid r value: {}".format(r))
            
        defaults = dict(lr=lr,r=r)
        super(VFOSGD_PF, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(VFOSGD_PF, self).__setstate__(state)
        
    
    def step(self, closure=None):
        eps=1e-8
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('VFOSGD_PF does not support sparse gradients')

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                 
                fractional_order_grad_other = grad.clone()
                
                state['step'] += 1
                
                r = group['r']
                
                step_size = group['lr']
                
                x = p.data
                #vfogd_pf
                y1 = torch.where(x <= 0, torch.tensor([eps]).to(device), x.to(device))
                y2 = torch.where(x > 0, torch.tensor([eps]).to(device), torch.tensor([1.0]).to(device))
                
                y1 = fractional_order_grad_other*y1**(1-r)/math.gamma(2-r)
                y2 = fractional_order_grad_other*y2
                
                g = y1 + y2
                
                p.data.add_(-step_size, g)

        return loss
    

#vfoAdam_pf
class VFOAdam_PF(Optimizer):


    def __init__(self, params, lr=1e-1, betas=(0.9, 0.999), eps=1e-8,r=0.9):  #r on behalf of fractional order 
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
         
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
            
        if not 0.0 <= betas[0] < 1.0:
            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
            
        if not 0.0 <= betas[1] < 1.0:
            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
            
        if not -2.0 <= r:
            raise ValueError("Invalid r value: {}".format(r))
            
        defaults = dict(lr=lr, betas=betas, eps=eps,r=r)
        super(VFOAdam_PF, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(VFOAdam_PF, self).__setstate__(state)
        
    
    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('VFOAdam_PF does not support sparse gradients')

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    #initialize first moment and second moment
                    state['m'] = 0;state['v'] = 0
                
                fractional_order_grad_other = grad.clone()
                    
                state['step'] += 1
                
                r = group['r']
                
                step_size = group['lr']
                
                beta1, beta2 = group['betas']
                
                x = p.data
                #vfogd_pf
                y1 = torch.where(x <= 0, torch.tensor([group['eps']]).to(device), x.to(device))
                y2 = torch.where(x > 0, torch.tensor([group['eps']]).to(device), torch.tensor([1.0]).to(device))
                
                y1 = fractional_order_grad_other*y1**(1-r)/math.gamma(2-r)
                y2 = fractional_order_grad_other*y2
                
                g = y1 + y2                
                
                state['m'] = torch.add(beta1*state['m'],(1-beta1)*g)
                
                state['v'] = torch.add(beta2*state['v'],(1-beta2)*g**2)
                
                m = state['m']/(1 - beta1**state['step'])
                
                v = state['v']/(1 - beta2**state['step'])
                
                p.data.add_(-step_size, m/(torch.sqrt(v) + group['eps']))
                
        return loss
    

In [4]:
# 载入Fashion-MNIST数据集
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [5]:
# 定义模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [82]:
r = 0.2
'''
VFOGD_PF_02_Loss = []
VFOGD_PF_02_Accuracy = []


VFOAdam_PF_02_Loss = []
VFOAdam_PF_02_Accuracy = []
'''
IFOAdam_PF_02_Loss = []
IFOAdam_PF_02_Accuracy = []

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = Net()
net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer1 = torch.optim.SGD([{'params': net.conv1.parameters()},
                              {'params': net.pool.parameters()},
                              {'params': net.conv2.parameters()}], lr=0.001, momentum=0.9)
'''
optimizer2 = VFOSGD_PF([{'params': net.fc1.parameters()},
                       {'params': net.fc2.parameters()},
                       {'params': net.fc3.parameters()}],r=r,lr=0.001)

'''
optimizer2 = VFOAdam_PF([{'params': net.fc1.parameters()},
                       {'params': net.fc2.parameters()},
                       {'params': net.fc3.parameters()}],r=r,lr=0.001)

# training
for epoch in range(150):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        #optimizer1.zero_grad()
        #optimizer2.zero_grad()
        net.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer1.step()
        optimizer2.step()

        running_loss += loss.item()

    print('[%d] loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))
    #VFOGD_PF_02_Loss.append(running_loss / len(trainloader))
    #IFOGD_PF_02_Loss.append(running_loss / len(trainloader))
    #VFOAdam_PF_02_Loss.append(running_loss / len(trainloader))
    IFOAdam_PF_02_Loss.append(running_loss / len(trainloader))

    # estimate
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the test images: %.2f %%' % (
        100 * correct / total))
    #VFOGD_PF_02_Accuracy.append(correct / total)
    VFOAdam_PF_02_Accuracy.append(correct / total)
    net.train()

[1] loss: 0.711
Accuracy of the network on the test images: 80.24 %
[2] loss: 0.447
Accuracy of the network on the test images: 83.70 %
[3] loss: 0.387
Accuracy of the network on the test images: 85.00 %
[4] loss: 0.351
Accuracy of the network on the test images: 86.42 %
[5] loss: 0.326
Accuracy of the network on the test images: 87.14 %
[6] loss: 0.307
Accuracy of the network on the test images: 87.05 %
[7] loss: 0.294
Accuracy of the network on the test images: 87.98 %
[8] loss: 0.281
Accuracy of the network on the test images: 88.19 %
[9] loss: 0.269
Accuracy of the network on the test images: 88.56 %
[10] loss: 0.258
Accuracy of the network on the test images: 88.15 %
[11] loss: 0.252
Accuracy of the network on the test images: 88.59 %
[12] loss: 0.242
Accuracy of the network on the test images: 89.11 %
[13] loss: 0.235
Accuracy of the network on the test images: 88.88 %
[14] loss: 0.227
Accuracy of the network on the test images: 88.77 %
[15] loss: 0.219
Accuracy of the network on

[120] loss: 0.043
Accuracy of the network on the test images: 88.40 %
[121] loss: 0.026
Accuracy of the network on the test images: 88.36 %
[122] loss: 0.039
Accuracy of the network on the test images: 88.17 %
[123] loss: 0.028
Accuracy of the network on the test images: 88.48 %
[124] loss: 0.031
Accuracy of the network on the test images: 87.93 %
[125] loss: 0.034
Accuracy of the network on the test images: 88.34 %
[126] loss: 0.040
Accuracy of the network on the test images: 88.13 %
[127] loss: 0.031
Accuracy of the network on the test images: 88.64 %
[128] loss: 0.028
Accuracy of the network on the test images: 88.18 %
[129] loss: 0.028
Accuracy of the network on the test images: 88.00 %
[130] loss: 0.034
Accuracy of the network on the test images: 88.54 %
[131] loss: 0.042
Accuracy of the network on the test images: 87.97 %
[132] loss: 0.027
Accuracy of the network on the test images: 88.38 %
[133] loss: 0.033
Accuracy of the network on the test images: 87.89 %
[134] loss: 0.027
Ac

In [40]:
import pandas as pd

'''
data = pd.DataFrame([VFOGD_PF_02_Loss,VFOGD_PF_04_Loss,VFOGD_PF_06_Loss,VFOGD_PF_08_Loss,VFOGD_PF_10_Loss,VFOGD_PF_12_Loss,VFOGD_PF_14_Loss],
                    index=['VFOGD_PF_02_Loss','VFOGD_PF_04_Loss','VFOGD_PF_06_Loss','VFOGD_PF_08_Loss','VFOGD_PF_10_Loss','VFOGD_PF_12_Loss','VFOGD_PF_14_Loss'])
data.to_csv('fashion_mnist_loss_accuracy/VFOGD_PF_Loss.csv')


data = pd.DataFrame([IFOGD_PF_02_Loss,IFOGD_PF_04_Loss,IFOGD_PF_06_Loss,IFOGD_PF_08_Loss,IFOGD_PF_10_Loss,IFOGD_PF_12_Loss,IFOGD_PF_14_Loss,IFOGD_PF_16_Loss,IFOGD_PF_18_Loss],
                    index=['IFOGD_PF_02_Loss','IFOGD_PF_04_Loss','IFOGD_PF_06_Loss','IFOGD_PF_08_Loss','IFOGD_PF_10_Loss','IFOGD_PF_12_Loss','IFOGD_PF_14_Loss','IFOGD_PF_16_Loss','IFOGD_PF_18_Loss'])
data.to_csv('fashion_mnist_loss_accuracy/IFOGD_PF_Loss.csv')

data = pd.DataFrame([VFOGD_PF_02_Accuracy,VFOGD_PF_04_Accuracy,VFOGD_PF_06_Accuracy,VFOGD_PF_08_Accuracy,VFOGD_PF_10_Accuracy,VFOGD_PF_12_Accuracy,VFOGD_PF_14_Accuracy],
                    index=['VFOGD_PF_02_Accuracy','VFOGD_PF_04_Accuracy','VFOGD_PF_06_Accuracy','VFOGD_PF_08_Accuracy','VFOGD_PF_10_Accuracy','VFOGD_PF_12_Accuracy','VFOGD_PF_14_Accuracy'])
data.to_csv('fashion_mnist_loss_accuracy/VFOGD_PF_Accuracy.csv')


data = pd.DataFrame([IFOGD_PF_02_Accuracy,IFOGD_PF_04_Accuracy,IFOGD_PF_06_Accuracy,IFOGD_PF_08_Accuracy,IFOGD_PF_10_Accuracy,IFOGD_PF_12_Accuracy,IFOGD_PF_14_Accuracy,IFOGD_PF_16_Accuracy,IFOGD_PF_18_Accuracy],
                    index=['IFOGD_PF_02_Accuracy','IFOGD_PF_04_Accuracy','IFOGD_PF_06_Accuracy','IFOGD_PF_08_Accuracy','IFOGD_PF_10_Accuracy','IFOGD_PF_12_Accuracy','IFOGD_PF_14_Accuracy','IFOGD_PF_16_Accuracy','IFOGD_PF_18_Accuracy'])
data.to_csv('fashion_mnist_loss_accuracy/IFOGD_PF_Accuracy.csv')
data = pd.DataFrame([VFOAdam_PF_02_Loss,VFOAdam_PF_04_Loss,VFOAdam_PF_06_Loss,VFOAdam_PF_08_Loss,VFOAdam_PF_10_Loss,VFOAdam_PF_12_Loss,VFOAdam_PF_14_Loss],
                    index=['VFOAdam_PF_02_Loss','VFOAdam_PF_04_Loss','VFOAdam_PF_06_Loss','VFOAdam_PF_08_Loss','VFOAdam_PF_10_Loss','VFOAdam_PF_12_Loss','VFOAdam_PF_14_Loss'])
data.to_csv('fashion_mnist_loss_accuracy/VFOAdam_PF_Loss.csv')
'''
data = pd.DataFrame([VFOAdam_PF_02_Accuracy,VFOAdam_PF_04_Accuracy,VFOAdam_PF_06_Accuracy,VFOAdam_PF_08_Accuracy,VFOAdam_PF_10_Accuracy,VFOAdam_PF_12_Accuracy,VFOAdam_PF_14_Accuracy],
                    index=['VFOAdam_PF_02_Accuracy','VFOAdam_PF_04_Accuracy','VFOAdam_PF_06_Accuracy','VFOAdam_PF_08_Accuracy','VFOAdam_PF_10_Accuracy','VFOAdam_PF_12_Accuracy','VFOAdam_PF_14_Accuracy'])
data.to_csv('fashion_mnist_loss_accuracy/VFOAdam_PF_Accuracy.csv')
