In [123]:
# coding=utf8

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [124]:
# 1. 获取数据
# 2. 基本数据处理
# 2.1 缺失值处理
# 2.2 确定特征值，目标值
# 2.3 分割数据
# 3. 特征工程（标准化）
# 4. 机器学习（逻辑回归）
# 5. 模型评估

In [125]:
# 1. 获取数据
df = pd.read_csv('data/交易明细.csv',
                 sep=',',
                 # index_col=['时间'],  # 设置行索引
                 parse_dates=['时间'],  # 解析时间 2017/8/16 18:10
                 date_parser=lambda x: pd.to_datetime(x, format='%Y/%m/%d %H:%M'),  # 时间解析的格式
                 usecols=['时间','方向','开仓均价','类别'],
                 encoding='gbk',
                 )
print(df.dtypes)
df

时间      datetime64[ns]
方向               int64
开仓均价             int64
类别               int64
dtype: object


Unnamed: 0,时间,方向,开仓均价,类别
0,2017-08-16 18:10:00,-1,1723,0
1,2017-08-18 13:30:00,1,1726,1
2,2017-08-18 13:50:00,-1,1732,0
3,2017-08-18 14:00:00,1,1735,0
4,2017-08-21 13:25:00,-1,1732,0
...,...,...,...,...
495,2018-12-13 13:10:00,1,1868,1
496,2018-12-13 13:25:00,-1,1869,0
497,2018-12-13 14:05:00,1,1872,0
498,2018-12-13 14:45:00,-1,1870,0


In [126]:
# 2. 基本数据处理
# 2.1 缺失值处理
# 2.2 确定特征值，目标值

In [127]:
x = df.iloc[:, 1: -1]
x.head()

Unnamed: 0,方向,开仓均价
0,-1,1723
1,1,1726
2,-1,1732
3,1,1735
4,-1,1732


In [128]:
y = df["类别"]
y.head()

0    0
1    1
2    0
3    0
4    0
Name: 类别, dtype: int64

In [129]:
# 2.3 分割数据
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=2,  test_size=0.2)

In [130]:
# 3. 特征工程（标准化）
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.fit_transform(x_test)

In [132]:
x_train.shape

(400, 2)

In [133]:
# 4. 机器学习（逻辑回归）
estimator = LogisticRegression()
estimator.fit(x_train, y_train)

LogisticRegression()

In [134]:
# 5. 模型评估
y_pre = estimator.predict(x_test)
print("预测值：\n", y_pre)

score = estimator.score(x_test, y_test)
print("准确率是：\n", score)


预测值：
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
准确率是：
 0.79
