In [9]:
import tensorflow as tf
import numpy as np
import pymysql
from datetime import date, timedelta


DB_IP = '192.168.1.210'
DB_USER = 'root'
DB_PWD = '1234'
DB_SCH = 'data'
DB_ENC = 'utf8mb4'
LIMIT_FILTER = 0.70

INPUT_VEC_SIZE = LSTM_SIZE = 7
TIME_STEP_SIZE = 60
LABEL_SIZE = 3
LSTM_DEPTH = 4

BATCH_SIZE = 15000
TRAIN_CNT = 100

def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

In [10]:
class DBManager :
    def __init__(self):
        self.DB_IP = '192.168.1.210'
        self.DB_USER = 'root'
        self.DB_PWD = '1234'
        self.DB_SCH = 'data'
        self.DB_ENC = 'utf8mb4'
        self.conn = self.get_new_conn()
        
    def __del__(self):
        self.conn.close()
    def get_new_conn(self):
        return pymysql.connect(host=self.DB_IP, user=self.DB_USER, password=self.DB_PWD, db=self.DB_SCH, charset=self.DB_ENC)
    def get_codedates(self, code, limit):    
        query = "SELECT date FROM data.daily_stock WHERE code = %s AND date <= %s ORDER BY date ASC"
        cursor = self.conn.cursor()
        cursor.execute(query, (code, limit))
        code_dates = list()        
        dates = cursor.fetchall()
        for date in dates:
            code_dates.append((code, date[0]))
        return code_dates
    def get_items(self, code, date, limit):
        query = "SELECT open, high, low, close, volume, hold_foreign, st_purchase_inst FROM data.daily_stock WHERE code = %s AND date >= %s ORDER BY date ASC LIMIT %s"
        cursor = self.conn.cursor()
        cursor.execute(query, (code, date, limit))
        items = cursor.fetchall()        
        return items
    
    def get_codes(self):
        query = "SELECT DISTINCT code FROM data.daily_stock"
        cursor = self.conn.cursor()
        cursor.execute(query)
        return cursor.fetchall()
    def insert_result(self, expect, code, analyze_at, potential, evaluate, volume) :
        if self.check_exist(expect, code, analyze_at, evaluate):
            print('duplicate', expect, code, analyze_at)
        else :
            cursor = self.conn.cursor()
            print(expect,code,analyze_at,potential,volume,evaluate)
            cursor.execute("INSERT INTO forecast (type, code, analyzeAt, potential, volume, evaluate) VALUES (%s, %s, %s, %s, %s, %s)",
                           (expect, code, analyze_at, str(potential), volume, evaluate))
            self.conn.commit()
    def check_exist(self, expect, code, analyze_at, evaluate):
        cursor = self.conn.cursor()
        cursor.execute("SELECT count(*) as cnt FROM forecast WHERE type = %s AND code = %s AND analyzeAt = %s AND evaluate = %s", (expect, code, analyze_at, evaluate))
        cnt = cursor.fetchone()
        return cnt[0] > 0
    def get_volume(self, code, limit_at):
        cursor = self.conn.cursor()
        cursor.execute("SELECT count(*) as cnt FROM daily_stock WHERE code = %s AND date <= %s", (code, limit_at))
        cnt = cursor.fetchone()
        return cnt[0]
        
    

In [11]:
def model(code, X, W, B, lstm_size):
    XT = tf.transpose(X, [1, 0, 2]) 
    XR = tf.reshape(XT, [-1, lstm_size])
    X_split = tf.split(0, TIME_STEP_SIZE, XR)
    with tf.variable_scope(code, reuse=False):
        cell = tf.nn.rnn_cell.GRUCell(lstm_size)
        cell = tf.nn.rnn_cell.DropoutWrapper(cell = cell, output_keep_prob = 0.5)
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * LSTM_DEPTH, state_is_tuple = True)

    outputs, _states = tf.nn.rnn(cell, X_split, dtype=tf.float32)

    return tf.matmul(outputs[-1], W) + B, cell.state_size # State size to initialize the stat


In [16]:
def read_series_datas(db, code_dates):
    X = list()
    Y = list()
    for code_date in code_dates:
        items = db.get_items(code_date[0], code_date[1], TIME_STEP_SIZE + EVALUATE_SIZE)
  
        if len(items) < (EVALUATE_SIZE + TIME_STEP_SIZE):
            break
        X.append(np.array(items[:TIME_STEP_SIZE]))

        st_purchase_inst = items[-(EVALUATE_SIZE + 1)][EXPECT]
        if st_purchase_inst == 0:
            continue
        for i in range(EVALUATE_SIZE, len(items) - EVALUATE_SIZE):
            eval_inst = items[i][EXPECT]
            eval_bef = items[EVALUATE_SIZE-i][EXPECT]
            if eval_bef < eval_inst:
                eval_bef = eval_inst           
        
        if (eval_bef - st_purchase_inst) / st_purchase_inst < -0.02: #percent ? cnt ? 
            Y.append((0., 0., 1.))
        elif (eval_bef - st_purchase_inst) / st_purchase_inst > 0.03:
            Y.append((1., 0., 0.))
        else:
            Y.append((0., 1., 0.))


    arrX = np.array(X)    
    meanX = np.mean(arrX, axis = 0)
    stdX = np.std(arrX, axis = 0)
    norX = (arrX - meanX) / stdX
    norY = np.array(Y)
    return norX, norY


In [17]:
def read_datas(db, code_dates):    
    np.random.seed()
    np.random.shuffle(code_dates)

    trX = list()
    trY = list()
    trX, trY = read_series_datas(db, code_dates)
    teX, teY = read_series_datas(db, code_dates)

    return trX, trY, teX, teY

In [18]:
def analyze(code, limit):      
    db = DBManager()
    code_dates = db.get_codedates(code, limit)
    tf.reset_default_graph()    
    last = code_dates[-1][1]
    trX, trY, teX, teY = read_datas(db, code_dates)
    if (len(trX) == 0):
        return None

    X = tf.placeholder(tf.float32, [None, TIME_STEP_SIZE, INPUT_VEC_SIZE])
    Y = tf.placeholder(tf.float32, [None, LABEL_SIZE])

    W = init_weights([LSTM_SIZE, LABEL_SIZE])
    B = init_weights([LABEL_SIZE])

    py_x, state_size = model(code, X, W, B, LSTM_SIZE)

    loss = tf.nn.softmax_cross_entropy_with_logits(py_x, Y)
    cost = tf.reduce_mean(loss)
    train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
    predict_op = tf.argmax(py_x, 1)

    # Launch the graph in a session
    analyzed = None
    with tf.Session() as sess:
        # you need to initialize all variables
        tf.initialize_all_variables ().run()

        for loop in range(TRAIN_CNT):
            for start, end in zip(range(0, len(trX), BATCH_SIZE), range(BATCH_SIZE, len(trX)+1, BATCH_SIZE)):
                sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})

            test_indices = np.arange(len(teY))
            org = teY[test_indices] ## fixfix
            res = sess.run(predict_op, feed_dict={X: teX[test_indices], Y: teY[test_indices]})
            
            if loop == TRAIN_CNT-1 :
                result = np.mean(np.argmax(org, axis=1) == res)                
                analyzed = {"code":code, "per":round(result, 2), "date":limit}
                print(analyzed)
    return analyzed


In [19]:
limit = '2017-02-19'
EXPECT = 6 ##open, high, low, close, volume, hold_foreign, st_purchase_inst
EVALUATE_SIZE = 3

codes = DBManager().get_codes()
for code in codes : 
    analyzed = analyze(code[0], limit)
    if analyzed is not None and analyzed["per"] > LIMIT_FILTER:
        db = DBManager()
        volume = db.get_volume(analyzed["code"], limit)
        db.insert_result(EXPECT, analyzed["code"], limit, analyzed["per"], EVALUATE_SIZE, volume)        
        print('insert result ', analyzed, volume)

print('done')

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


{'code': 'A000050', 'per': 0.5, 'date': '2017-02-19'}
{'code': 'A000070', 'per': 0.27000000000000002, 'date': '2017-02-19'}
{'code': 'A000080', 'per': 0.59999999999999998, 'date': '2017-02-19'}
{'code': 'A000100', 'per': 0.46000000000000002, 'date': '2017-02-19'}
{'code': 'A000120', 'per': 0.23999999999999999, 'date': '2017-02-19'}
{'code': 'A000140', 'per': 0.48999999999999999, 'date': '2017-02-19'}
{'code': 'A000150', 'per': 0.14000000000000001, 'date': '2017-02-19'}




{'code': 'A000210', 'per': 0.0, 'date': '2017-02-19'}
{'code': 'A000230', 'per': 0.050000000000000003, 'date': '2017-02-19'}
{'code': 'A000270', 'per': 0.12, 'date': '2017-02-19'}
{'code': 'A000640', 'per': 0.32000000000000001, 'date': '2017-02-19'}
{'code': 'A000660', 'per': 0.46000000000000002, 'date': '2017-02-19'}
{'code': 'A000670', 'per': 0.14000000000000001, 'date': '2017-02-19'}
{'code': 'A000720', 'per': 0.31, 'date': '2017-02-19'}
{'code': 'A000810', 'per': 0.20999999999999999, 'date': '2017-02-19'}
{'code': 'A000880', 'per': 0.11, 'date': '2017-02-19'}
{'code': 'A000990', 'per': 0.14999999999999999, 'date': '2017-02-19'}
{'code': 'A001040', 'per': 0.22, 'date': '2017-02-19'}
{'code': 'A001060', 'per': 0.44, 'date': '2017-02-19'}
{'code': 'A001120', 'per': 0.5, 'date': '2017-02-19'}
{'code': 'A001230', 'per': 0.47999999999999998, 'date': '2017-02-19'}
{'code': 'A001430', 'per': 0.59999999999999998, 'date': '2017-02-19'}
{'code': 'A001450', 'per': 0.62, 'date': '2017-02-19'}
{

IndexError: index 79 is out of bounds for axis 0 with size 79