### GAN 和 Recurrent GAN 的结构

#### GAN:

![title](./GAN.PNG)

#### 利用GAN生成历史数据

![title](./stock_data_gen.PNG)

### 利用MLP产生一组随机的时间序列（Generator）

In [83]:
import numpy as np
import matplotlib.pyplot as plt

import pandas as pd
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions.normal import Normal

In [84]:
# 产生用于输入Generator的噪声, 为正态分布
# 其大小为 m x n

def get_noise_data(m, n):
    dist = Normal(0, 1)
    return dist.sample((m, n)).requires_grad_()

In [85]:
class Generator(nn.Module):
    def __init__(self, input_size=20, num_features=4, batch_size=10, seq_len=26):
        super().__init__()
        self.input_size = input_size   # nums of input rand numbers
        self.num_features = num_features
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.output_size = self.num_features * self.seq_len
        
        # 使用MLP
        self.fc1 = nn.Linear(self.input_size, 256)
        self.fc2 = nn.Linear(256, 512)
        self.fc3 = nn.Linear(512, self.output_size)
        
    def forward(self, input_data):
        output = torch.sigmoid(self.fc1(input_data))
        output = torch.sigmoid(self.fc2(output))
        output = torch.sigmoid(self.fc3(output))
        
        # output size: [batch_size, output_size]
        return output

In [86]:
input_size = 20
batch_size = 10
seq_len = 26

g = Generator(input_size=input_size, batch_size=batch_size, seq_len=seq_len)

In [87]:
input_data = get_noise_data(batch_size, input_size)

time_series = g.forward(input_data)

time_series.shape

torch.Size([10, 104])

### 下面是利用MLP构造的Discriminator

In [88]:
class Discriminator(nn.Module):
    def __init__(self, batch_size=10, seq_len=26, num_features=4):
        super().__init__()
        self.input_size = seq_len * num_features
        self.batch_size = batch_size
        self.seq_len = seq_len
        
        # 使用 MLP
        self.fc1 = nn.Linear(self.input_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 1)
        
    def forward(self, input_data):
        output = torch.sigmoid(self.fc1(input_data))
        output = torch.sigmoid(self.fc2(output))
        output = torch.sigmoid(self.fc3(output))
        output = torch.sigmoid(self.fc4(output))
        
        return output

In [89]:
# 一起测试Generator和Discriminator
input_size = 20
batch_size = 1
seq_len = 26

g = Generator(input_size=input_size, batch_size=batch_size, seq_len=seq_len)
input_data = get_noise_data(batch_size, input_size)
time_series = g.forward(input_data)

print(time_series.shape)

d = Discriminator(batch_size=batch_size, seq_len=seq_len)
outputs = d.forward(time_series)

print(outputs)

torch.Size([1, 104])
tensor([[0.4655]], grad_fn=<SigmoidBackward>)


### 获取真实样本

In [90]:
# 从csv文件中读取数据，将数据划分为训练集和测试集
# 再从训练集中随机抽取m个长度为n的串返回

data = pd.read_csv('stock_data_730.csv')

data.set_index(["date"], inplace=True)
data_sorted = data.sort_index()

In [91]:
# 将所有数据分割为训练集和测试集
# 与 LSTM -- predict stock price中的函数相同

def train_test_split(data, SEQ_LENGTH = 26, test_prop=0.137):  # 0.11 for 1095, 0.137 for 730, 0.3 for 365
    
    ntrain = int(len(data) *(1-test_prop))  # len(data) = 197
    predictors = data.columns[:4]  # open, high, close, low
    data_pred = data[predictors]
    num_attr = data_pred.shape[1]  # 4
    
    result = np.empty((len(data) - SEQ_LENGTH, SEQ_LENGTH, num_attr))
    y = np.empty((len(data) - SEQ_LENGTH, SEQ_LENGTH))
    yopen = np.empty((len(data) - SEQ_LENGTH, SEQ_LENGTH))

    for index in range(len(data) - SEQ_LENGTH):
        result[index, :, :] = data_pred[index: index + SEQ_LENGTH]
        y[index, :] = data_pred[index+1: index + SEQ_LENGTH + 1].close
        yopen[index, :] = data_pred[index+1: index + SEQ_LENGTH + 1].open

    """
        xtrain的大小：ntrain x SEQ_LENGTH x 4
        ytrain的大小：ntrain x SEQ_LENGTH
        
        * xtrain的每个batch为长为SEQ_LENGTH的连续序列，一共有ntrain个batch，
          序列中每个单元都是一个四元组（open，high，close，low）
        * ytrain的每个batch为长为SEQ_LENGTH的连续序列，一共有ntrain个batch，
          序列中每个单元是xtrain中对应四元组所在日期的下一天的close price
        
        xtest 的大小：    ntest x SEQ_LENGTH x 4                
        ytest的大小：     ntest x SEQ_LENGTH      (close price)
        ytest_open的大小：ntest x SEQ_LENGTH      (open price)  
        
        * xtest的每个batch为长为SEQ_LENGTH的连续序列，一共有ntest个batch，
          序列中每个单元都是一个四元组（open，high，close，low）
          每一个序列仅包含一个新四元组，且在最后一个
        * ytest的每个batch为长为SEQ_LENGTH的连续序列，一共有ntest个batch，
          序列中每个单元是xtest中对应四元组所在日期的下一天的close price
        
        类型：numpy.ndarray
    """
    xtrain = result[:ntrain, :, :]
    ytrain = y[:ntrain]
    
    xtest = result[ntrain:, :, :]
    ytest = y[ntrain:]
    ytest_open = yopen[ntrain:]
    
    return xtrain, xtest, ytrain, ytest, ytest_open

In [92]:
xtrain, xtest, ytrain, ytest, ytest_open = train_test_split(data_sorted)  # 只需要xtrain

In [93]:
xtrain.shape  # open, high, close, low

xtrain_mean = np.mean(xtrain)
xtrain_max = np.max(xtrain)
xtrain_min = np.min(xtrain)

print(xtrain_mean, xtrain_max, xtrain_min)

15.418522797766748 21.26 9.86


In [94]:
# 获取下标从start_idx开始的连续batch_size个序列

def get_real_samples(idx, batch_size, data=xtrain):
    data = data[idx:idx+batch_size, :]
    min_val = np.min(data)
    max_val = np.max(data)
    mean_val = np.mean(data)
    data = (data-mean_val)/(max_val-min_val)
    
    data = torch.from_numpy(data).float()
    data = data.view(batch_size, -1)
    
    return data.requires_grad_()

In [95]:
get_real_samples(0, 1).shape

torch.Size([1, 104])

In [96]:
# 将real samples带入Discriminator中进行测试

batch_size = 10
seq_len = 26

real_samples = get_real_samples(0, batch_size)

print(real_samples.shape)

d = Discriminator(batch_size=batch_size, seq_len=seq_len)
outputs = d.forward(real_samples)

print(outputs)

torch.Size([10, 104])
tensor([[0.5846],
        [0.5846],
        [0.5845],
        [0.5845],
        [0.5845],
        [0.5845],
        [0.5844],
        [0.5844],
        [0.5844],
        [0.5844]], grad_fn=<SigmoidBackward>)


### 构造GAN

In [97]:
d_learning_rate = 0.01
g_learning_rate = 0.01

input_size = 20
batch_size = 5
seq_len = 26

G = Generator(input_size=input_size, batch_size=batch_size, seq_len=seq_len)
D = Discriminator(batch_size=batch_size, seq_len=seq_len)

d_optimizer = optim.SGD(D.parameters(), lr=d_learning_rate) 
g_optimizer = optim.SGD(G.parameters(), lr=g_learning_rate)

In [98]:
num_epochs = 500

for epoch in range(num_epochs):
    
    G.train()
    D.train()
    
    sum_real_error = 0
    sum_gen_loss = 0
    
    start_time = time.time()
    
    for idx in range(0, len(xtrain)-batch_size+1, batch_size):
        
        """
            训练Discriminator
        """
        D.zero_grad()
        # 用真实样本训练D 
        real_data = get_real_samples(idx, batch_size)
        real_decision = D(real_data)
        real_error = -torch.sum(torch.log(real_decision))/batch_size
        sum_real_error += real_error
        real_error.backward()
    
        # 用生成样本训练D
        input_data = get_noise_data(batch_size, input_size)
        fake_data = G.forward(input_data) 
        fake_decision = D(fake_data)
        fake_error = 1 - torch.sum(torch.log(fake_decision))/batch_size
        fake_error.backward()
    
        d_optimizer.step()
    
    
        """
            训练Generator
        """
        G.zero_grad()
        # 训练G
        input_data = get_noise_data(batch_size, input_size)
        fake_data = G.forward(input_data)
        fake_decision = D(fake_data)
        gen_loss = -torch.sum(torch.log(fake_decision))/batch_size
        sum_gen_loss += gen_loss
        gen_loss.backward()

        g_optimizer.step()
    
    print("Epoch %5d:  D Loss  %5.3f  " % (epoch, sum_real_error), end="")
    print("G Loss  %5.3f  " % (sum_gen_loss), end="")
    print("duration: %5f" %(time.time()-start_time))

Epoch     0:  D Loss  6.216  G Loss  5.506  duration: 0.209405
Epoch     1:  D Loss  0.933  G Loss  0.922  duration: 0.204474
Epoch     2:  D Loss  0.529  G Loss  0.525  duration: 0.230384
Epoch     3:  D Loss  0.369  G Loss  0.366  duration: 0.201478
Epoch     4:  D Loss  0.282  G Loss  0.280  duration: 0.203907
Epoch     5:  D Loss  0.228  G Loss  0.227  duration: 0.195495
Epoch     6:  D Loss  0.191  G Loss  0.190  duration: 0.204188
Epoch     7:  D Loss  0.164  G Loss  0.163  duration: 0.201815
Epoch     8:  D Loss  0.144  G Loss  0.143  duration: 0.203459
Epoch     9:  D Loss  0.128  G Loss  0.127  duration: 0.211434
Epoch    10:  D Loss  0.115  G Loss  0.114  duration: 0.197472
Epoch    11:  D Loss  0.104  G Loss  0.104  duration: 0.205986
Epoch    12:  D Loss  0.096  G Loss  0.095  duration: 0.201302
Epoch    13:  D Loss  0.088  G Loss  0.088  duration: 0.202085
Epoch    14:  D Loss  0.082  G Loss  0.081  duration: 0.205161
Epoch    15:  D Loss  0.076  G Loss  0.076  duration: 0

Epoch   131:  D Loss  0.008  G Loss  0.008  duration: 0.285774
Epoch   132:  D Loss  0.008  G Loss  0.008  duration: 0.289350
Epoch   133:  D Loss  0.008  G Loss  0.008  duration: 0.288238
Epoch   134:  D Loss  0.008  G Loss  0.008  duration: 0.291479
Epoch   135:  D Loss  0.008  G Loss  0.008  duration: 0.288331
Epoch   136:  D Loss  0.008  G Loss  0.008  duration: 0.283194
Epoch   137:  D Loss  0.008  G Loss  0.008  duration: 0.287470
Epoch   138:  D Loss  0.008  G Loss  0.007  duration: 0.291225
Epoch   139:  D Loss  0.007  G Loss  0.007  duration: 0.288110
Epoch   140:  D Loss  0.007  G Loss  0.007  duration: 0.286235
Epoch   141:  D Loss  0.007  G Loss  0.007  duration: 0.285439
Epoch   142:  D Loss  0.007  G Loss  0.007  duration: 0.291230
Epoch   143:  D Loss  0.007  G Loss  0.007  duration: 0.284239
Epoch   144:  D Loss  0.007  G Loss  0.007  duration: 0.286484
Epoch   145:  D Loss  0.007  G Loss  0.007  duration: 0.289044
Epoch   146:  D Loss  0.007  G Loss  0.007  duration: 0

Epoch   262:  D Loss  0.004  G Loss  0.004  duration: 0.294011
Epoch   263:  D Loss  0.004  G Loss  0.004  duration: 0.299491
Epoch   264:  D Loss  0.004  G Loss  0.004  duration: 0.319579
Epoch   265:  D Loss  0.004  G Loss  0.004  duration: 0.311388
Epoch   266:  D Loss  0.004  G Loss  0.004  duration: 0.290977
Epoch   267:  D Loss  0.004  G Loss  0.004  duration: 0.288082
Epoch   268:  D Loss  0.004  G Loss  0.004  duration: 0.290050
Epoch   269:  D Loss  0.004  G Loss  0.004  duration: 0.287780
Epoch   270:  D Loss  0.004  G Loss  0.004  duration: 0.285045
Epoch   271:  D Loss  0.004  G Loss  0.004  duration: 0.283221
Epoch   272:  D Loss  0.004  G Loss  0.004  duration: 0.283968
Epoch   273:  D Loss  0.004  G Loss  0.004  duration: 0.288416
Epoch   274:  D Loss  0.004  G Loss  0.004  duration: 0.288271
Epoch   275:  D Loss  0.004  G Loss  0.004  duration: 0.298203
Epoch   276:  D Loss  0.004  G Loss  0.004  duration: 0.283803
Epoch   277:  D Loss  0.004  G Loss  0.004  duration: 0

Epoch   393:  D Loss  0.002  G Loss  0.002  duration: 0.299647
Epoch   394:  D Loss  0.002  G Loss  0.002  duration: 0.293106
Epoch   395:  D Loss  0.002  G Loss  0.002  duration: 0.308527
Epoch   396:  D Loss  0.002  G Loss  0.002  duration: 0.298208
Epoch   397:  D Loss  0.002  G Loss  0.002  duration: 0.300198
Epoch   398:  D Loss  0.002  G Loss  0.002  duration: 0.297093
Epoch   399:  D Loss  0.002  G Loss  0.002  duration: 0.295214
Epoch   400:  D Loss  0.002  G Loss  0.002  duration: 0.298519
Epoch   401:  D Loss  0.002  G Loss  0.002  duration: 0.308219
Epoch   402:  D Loss  0.002  G Loss  0.002  duration: 0.297494
Epoch   403:  D Loss  0.002  G Loss  0.002  duration: 0.336351
Epoch   404:  D Loss  0.002  G Loss  0.002  duration: 0.301194
Epoch   405:  D Loss  0.002  G Loss  0.002  duration: 0.302702
Epoch   406:  D Loss  0.002  G Loss  0.002  duration: 0.303210
Epoch   407:  D Loss  0.002  G Loss  0.002  duration: 0.296207
Epoch   408:  D Loss  0.002  G Loss  0.002  duration: 0

### 利用Generator生成时间序列

In [120]:
gen_batch_size = 100

input_data = get_noise_data(gen_batch_size, input_size)
time_series = G.forward(input_data)

In [121]:
time_series = time_series.view(gen_batch_size, -1, 4).detach().numpy()

In [122]:
time_series = time_series * (xtrain_max - xtrain_min) + xtrain_mean
time_series.shape

(100, 26, 4)

In [123]:
np.save('./time_series.npy', time_series)