In [1]:
import tensorflow as tf
import numpy as np
import pymysql

DB_IP = '192.168.1.210'
DB_USER = 'root'
DB_PWD = '1234'
DB_SCH = 'data'
DB_ENC = 'utf8mb4'
LIMIT_FILTER = 0.70

INPUT_VEC_SIZE = LSTM_SIZE = 7
TIME_STEP_SIZE = 60
LABEL_SIZE = 3
EVALUATE_SIZE = 3
LSTM_DEPTH = 4

BATCH_SIZE = 15000
TRAIN_CNT = 100
conn = pymysql.connect(host=DB_IP, user=DB_USER, password=DB_PWD, db=DB_SCH, charset=DB_ENC)


def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

In [2]:
def get_codedates(code):
    query = "SELECT date FROM data.daily_stock WHERE code = %s ORDER BY date ASC"
    try :
        cursor = conn.cursor()
        cursor.execute(query, (code))
    except OperationalError:
        set_connect()
        cursor = conn.cursor()
        cursor.execute(query, (code))
        
    code_dates = list()        
    dates = cursor.fetchall()
    for date in dates:
        code_dates.append((code, date[0]))    
    return code_dates

def get_items(code, date, limit):
    query = "SELECT open, high, low, close, volume, hold_foreign, st_purchase_inst FROM data.daily_stock WHERE code = %s AND date >= %s ORDER BY date ASC LIMIT %s"
    try :
        cursor = conn.cursor()
        cursor.execute(query, (code, date, limit))
    except OperationalError:
        set_connect()
        cursor = conn.cursor()
        cursor.execute(query, (code, date, limit))
    return cursor.fetchall()  
    
def get_codes():
    query = "SELECT DISTINCT code FROM data.daily_stock"
    try :
        cursor = conn.cursor()
        cursor.execute(query)
    except pymysql.err.OperationalError as e:
        print('lost connection ', e)
        set_connect()
        cursor = conn.cursor()
        cursor.execute(query)
    return cursor.fetchall()
    
    
def set_connect():
    print('reset connect')
    conn = pymysql.connect(host=DB_IP, user=DB_USER, password=DB_PWD, db=DB_SCH, charset=DB_ENC)
    
    

In [3]:
def model(code, X, W, B, lstm_size):
    XT = tf.transpose(X, [1, 0, 2]) 
    XR = tf.reshape(XT, [-1, lstm_size])
    X_split = tf.split(0, TIME_STEP_SIZE, XR)
    with tf.variable_scope(code, reuse=False):
        cell = tf.nn.rnn_cell.GRUCell(lstm_size)
        cell = tf.nn.rnn_cell.DropoutWrapper(cell = cell, output_keep_prob = 0.5)
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * LSTM_DEPTH, state_is_tuple = True)

    outputs, _states = tf.nn.rnn(cell, X_split, dtype=tf.float32)

    return tf.matmul(outputs[-1], W) + B, cell.state_size # State size to initialize the stat


In [4]:
def read_series_datas(code_dates):
    X = list()
    Y = list()
    for code_date in code_dates:
        items = get_items(code_date[0], code_date[1], TIME_STEP_SIZE + EVALUATE_SIZE)
  
        if len(items) < (EVALUATE_SIZE + TIME_STEP_SIZE):
            break
        X.append(np.array(items[:TIME_STEP_SIZE]))

        st_purchase_inst = items[-(EVALUATE_SIZE + 1)][6]
        if st_purchase_inst == 0:
            continue
        for i in range(EVALUATE_SIZE, len(items) - EVALUATE_SIZE):
            eval_inst = items[i][6]
            eval_bef = items[EVALUATE_SIZE-i][6]
            if eval_bef < eval_inst:
                eval_bef = eval_inst           
        
        if (eval_bef - st_purchase_inst) / st_purchase_inst < -0.02: #percent ? cnt ? 
            Y.append((0., 0., 1.))
        elif (eval_bef - st_purchase_inst) / st_purchase_inst > 0.04:
            Y.append((1., 0., 0.))
        else:
            Y.append((0., 1., 0.))


    arrX = np.array(X)    
    meanX = np.mean(arrX, axis = 0)
    stdX = np.std(arrX, axis = 0)
    norX = (arrX - meanX) / stdX
    norY = np.array(Y)
    return norX, norY


In [5]:
def read_datas(code_dates):    
    np.random.seed()
    np.random.shuffle(code_dates)

    trX = list()
    trY = list()
    trX, trY = read_series_datas(code_dates)
    teX, teY = read_series_datas(code_dates)

    return trX, trY, teX, teY

In [6]:
def send_msg(msg):
    print(msg)
    

In [7]:
filtered = list()
for code in get_codes():
    tf.reset_default_graph()
    code_dates = get_codedates(code[0])
    
    last = code_dates[-1]
    trX, trY, teX, teY = read_datas(code_dates)
    if (len(trX) == 0):
        continue

    X = tf.placeholder(tf.float32, [None, TIME_STEP_SIZE, INPUT_VEC_SIZE])
    Y = tf.placeholder(tf.float32, [None, LABEL_SIZE])

    W = init_weights([LSTM_SIZE, LABEL_SIZE])
    B = init_weights([LABEL_SIZE])

    py_x, state_size = model(code[0], X, W, B, LSTM_SIZE)

    loss = tf.nn.softmax_cross_entropy_with_logits(py_x, Y)
    cost = tf.reduce_mean(loss)
    train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
    predict_op = tf.argmax(py_x, 1)

    # Launch the graph in a session
    with tf.Session() as sess:
        # you need to initialize all variables
        tf.global_variables_initializer().run()

        for loop in range(TRAIN_CNT):
            for start, end in zip(range(0, len(trX), BATCH_SIZE), range(BATCH_SIZE, len(trX)+1, BATCH_SIZE)):
                sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})

            test_indices = np.arange(len(teX))
            org = teY[test_indices]
            res = sess.run(predict_op, feed_dict={X: teX[test_indices], Y: teY[test_indices]})
            
            if loop == TRAIN_CNT-1 :
                result = np.mean(np.argmax(org, axis=1) == res)
                print(last, loop, result)
                if result > LIMIT_FILTER:
                    filtered.append({"per":result, "stock":last})
                    

send_msg(filtered)
# 이전데이터로 확인
# 1,2,3 
# 결과후 저장

('A000030', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.72
('A000050', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.428571428571
('A000070', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.0
('A000080', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.459016393443
('A000100', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.166666666667
('A000120', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.259842519685
('A000140', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.27027027027
('A000150', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.222222222222
('A000210', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.366666666667
('A000230', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.571428571429
('A000240', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.40350877193
('A000270', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.264150943396
('A000640', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.433333333333
('A000660', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.333333333333
('A000670', datetime.datetime(2017, 2, 13, 0, 0)) 99 0.467391304348
('A

NameError: name 'OperationalError' is not defined