### 0.导入包

In [1]:
from scipy.optimize import minimize
import numpy as np
import pandas as pd

### 1.定义参数估计的类

In [2]:
class myclass():
    def __init__(self, train_begin, data,para_initial,methods):
        self.train_begin = train_begin
        self.Train_data = data[(train_begin-1)*12 : (train_begin-1)*12 + 48] # 训练集
        self.Test_data = data[(train_begin-1)*12 + 48: (train_begin-1)*12 + 60] # 测试集

        self.X = np.concatenate((np.ones((self.Train_data.shape[0],1)), self.Train_data.iloc[:,2:].values), axis=1) # 解释变量
        self.y = self.Train_data.iloc[:,1].values # 被解释变量
        self.para_initial = para_initial # 参数初始值
        self.methods = methods # 优化所用算法

    def myloss(self, para): # 定义损失函数为：准确率
        lower = para[-2]
        upper = para[-1]

        yhat = np.dot(self.X, np.array(para[:self.X.shape[1]]))
        yhat1 = yhat.copy()
        yhat1[yhat1<=lower] = -1; yhat1[(yhat1>lower)&(yhat1<upper)] = 0; yhat1[yhat1>=upper] = 1; 

        # loss = sum((yhat1 - y)**2)
        loss = (yhat1 != self.y).sum() / len(self.y)
        return loss

    def myoptimal(self): # 对每一个参数初值，最小化损失函数，得到最优参数
        Res = []
        for method in self.methods:
            for para0 in self.para_initial:
                res = minimize(self.myloss, para0, method = method, bounds = ((None,None),)*self.X.shape[1] +  ((-10,-0.01),) + ((0.01,10),))
                Res.append(list(para0) + list(res.x) + [1 - res.fun, method])

        Res1 = pd.DataFrame(data = Res, columns = ['b'+str(i)+'initial' for i in range(self.X.shape[1])] + ['lowerinitial', 'upperinitial']+\
            ['b'+str(i) for i in range(self.X.shape[1])]+ ['lower', 'upper', 'train_acc', 'method'])
        # 返回 参数初值、最优参数、训练集准确率、优化算法名
        return Res1


    def testacc(self,para_hat): # 测试集的预测精度
        lower = para_hat[-2]
        upper = para_hat[-1]
        ytest = self.Test_data.iloc[:,1].values
        Xtest = np.concatenate((np.ones((self.Test_data.shape[0],1)), self.Test_data.iloc[:,2:].values), axis=1)

        yhat = np.dot(Xtest, np.array(para_hat[:Xtest.shape[1]]))
        yhat1 = yhat.copy()
        yhat1[yhat1<=lower] = -1; yhat1[(yhat1>lower)&(yhat1<upper)] = 0; yhat1[yhat1>=upper] = 1; 
        acc = (yhat1 == ytest).sum() / len(ytest)
        return acc

    def top10(self): # 返回预测准确率最高参数估计值
        Res1 = self.myoptimal()
        Restop10 = Res1.nlargest(10,'train_acc')[['b'+str(i) for i in range(self.X.shape[1])]+ ['lower', 'upper','train_acc']]
        Restop10['test_acc'] = Restop10[['b'+str(i) for i in range(self.X.shape[1])]+ ['lower', 'upper']].apply(lambda x: self.testacc(x), axis=1)
        Restop10['train_begin'] = str(self.train_begin+2014)+'01'
        return Restop10

    

### 2.导入数据、设置参数初值

In [3]:
data = pd.read_excel("input.xlsx", sheet_name=0)

random = np.random.RandomState(0)
n=300 #随机初始值的个数
numX = data.shape[1]-2 #自变量的个数
para_initial = np.concatenate(
    (random.randint(-20,21,(n,numX+1)), random.uniform(-10,-0.01,size=(n,1)), random.uniform(0.01,10,size=(n,1))), axis=1)

methods = ['Nelder-Mead', 'Powell']

### 3.计算最优准确率下的参数估计值，并输入到excel文件

In [4]:
RES = dict()
for train_begin in [1,2,3]:
    a = myclass(train_begin,data,para_initial,methods)
    RES[f"{train_begin}"] = a.top10()


In [5]:
with pd.ExcelWriter('output.xlsx') as EW:
    RES['1'].to_excel(EW, sheet_name = "训练集开始时间201501", index = False)
    RES['2'].to_excel(EW, sheet_name = "训练集开始时间201601", index = False)
    RES['3'].to_excel(EW, sheet_name = "训练集开始时间201701", index = False)

