In [None]:
import os
import numpy as np
import pandas as pd
import tushare as ts
import matplotlib.pyplot as plt


column_names = ['Date','Open','High','Low','Close','Volumn','Value','Rate','Mark']
file_name = "Desktop/sh600108_D_ExDiv.csv"

# get data

# preprocess
def transform_calendar_csv():
    df = pd.read_csv("/Volumes/Expansion Drive/calendar/dates_master.csv", header=None, index_col=0) 
    return df

def transform_single_csv(df_calendar, file_name):
    df = pd.read_csv(file_name, header=None, index_col=0)
    df = pd.concat([df, df_calendar], axis=1, join='inner')
    df = df.reset_index(drop=False)
    df.columns = column_names
    return df

def calculate_moving_average(df, periods, price_choice="Close"):
    first_values = np.array([])
    second_values = np.array([])
    for i in range(periods+1, len(df)):
        first_value = 0
        second_value = 0
        for j in range(0, periods):
            first_rate = df["Rate"][i-j-2]/df["Rate"][i-1]
            second_rate = df["Rate"][i-j-1]/df["Rate"][i-1]
            first_value += df[price_choice][i-j-2] * first_rate
            second_value += df[price_choice][i-j-1] * second_rate
        first_value = first_value/periods
        second_value = second_value/periods
        first_values = np.append(first_values, np.array([first_value]))
        second_values = np.append(second_values, np.array([second_value]))
    df = df[periods+1:]
    first_column = "first_{}_averages".format(periods)
    second_column = "second_{}_averages".format(periods)   
    df[first_column] = first_values
    df[second_column] = second_values
    return df

def combine_moving_averages(df, short_periods, long_periods, price_choice="Close"):
    short_df = calculate_moving_average(df, short_periods, price_choice)
    long_df = calculate_moving_average(df, long_periods, price_choice)   
    short_df = short_df[long_periods-short_periods:]
    long_first_column = "first_{}_averages".format(long_periods)
    long_second_column = "second_{}_averages".format(long_periods)
    short_df[long_first_column] = long_df[long_first_column]
    short_df[long_second_column] = long_df[long_second_column]  
    return short_df 

def find_average_matches(df, short_periods, long_periods):
    match_points = np.array([])
    first_value_differences = np.array([])
    second_value_differences = np.array([])  
    short_first_column = "first_{}_averages".format(short_periods)
    short_second_column = "second_{}_averages".format(short_periods)
    long_first_column = "first_{}_averages".format(long_periods)
    long_second_column = "second_{}_averages".format(long_periods)
    for i in range(len(df)):
        real_index = i+long_periods+1
        first_value_difference = df[short_first_column][real_index] - df[long_first_column][real_index]
        second_value_difference = df[short_second_column][real_index] - df[long_second_column][real_index]
        first_value_differences = np.append(first_value_differences, np.array([first_value_difference]))
        second_value_differences = np.append(second_value_differences, np.array([second_value_difference]))    
        if (second_value_differences[-1]>=0 and first_value_differences[-1]<0):
            match_points = np.append(match_points, np.array([1]))
        elif (second_value_differences[-1]<=0 and first_value_differences[-1]>0):
            match_points = np.append(match_points, np.array([-1]))     
        else:
            match_points = np.append(match_points, np.array([0]))        
    df["match_points"] = match_points
    return df

def calculate_daily_earnings(df, long_periods, buy_column="Open", sell_column="Open"):
    buy_in = False
    daily_earning_rates = np.array([])
    for i in range(len(df)):
        real_index = i+long_periods+1
        match_point = df["match_points"][real_index]      
        if (match_point == 1 and not buy_in):
            daily_earning_rate = (df["Close"][real_index] - df[buy_column][real_index])/df[buy_column][real_index]
            daily_earning_rates = np.append(daily_earning_rates, np.array([daily_earning_rate]))
            buy_in = True
        elif (match_point == -1 and buy_in):
            last_rate = df["Rate"][real_index-1]/df["Rate"][real_index]
            adjust_last_close = df["Close"][real_index-1] * last_rate
            daily_earning_rate = (df[sell_column][real_index]-adjust_last_close)/adjust_last_close
            daily_earning_rates = np.append(daily_earning_rates, np.array([daily_earning_rate]))
            buy_in = False
        elif (match_point == 0 and buy_in):   
            last_rate = df["Rate"][real_index-1]/df["Rate"][real_index]
            adjust_last_close = df["Close"][real_index-1] * last_rate
            daily_earning_rate = (df["Close"][real_index]-adjust_last_close)/adjust_last_close
            daily_earning_rates = np.append(daily_earning_rates, np.array([daily_earning_rate]))
        else:
            daily_earning_rates = np.append(daily_earning_rates, np.array([0]))
    df["daily_earning_rates"] = daily_earning_rates    
    return df 

def combine_daily_earnings_to_calendar(df, df_calendar):
    df = df.set_index('Date')
    df = pd.concat([df, df_calendar], axis=1, join='outer')
    df = df.reset_index(drop=False)
    df['daily_earning_rates']=df['daily_earning_rates'].fillna(0)
    return df

def calculate_cumulate_earnings(df):
    cumulate_earning_rates = np.array([])
    for i in range(len(df)):
        daily_earning_rate = df["daily_earning_rates"][i]
        if(len(cumulate_earning_rates)==0):
            cumulate_earning_rate = daily_earning_rate
        else:
            cumulate_earning_rate = daily_earning_rate + cumulate_earning_rates[-1]
        cumulate_earning_rates = np.append(cumulate_earning_rates, np.array([cumulate_earning_rate]))
    df["cumulate_earning_rates"] = cumulate_earning_rates
    return df

def combine_multiple_cumulate_earnings():
    files_address = "/Volumes/Expansion Drive/tkline"
    #test_address = "Desktop/test case"
    #file_names = np.array(os.listdir(test_address))
    file_names = np.array(os.listdir(files_address))
    df_combine = pd.read_csv("/Volumes/Expansion Drive/calendar/dates_master.csv", header=None)
    for file_name in file_names:
        df_calendar= transform_calendar_csv()
        #df = transform_single_csv(df_calendar, test_address+"/"+file_name)
        df = transform_single_csv(df_calendar, files_address+"/"+file_name)
        df2 = combine_moving_averages(df, 5, 20, price_choice="Close")
        df3 = find_average_matches(df2, 5, 20)
        df4 = calculate_daily_earnings(df3, 20, buy_column="Open", sell_column="Open")
        df5 = combine_daily_earnings_to_calendar(df4, df_calendar)
        df6 = calculate_cumulate_earnings(df5)
        df_combine[file_name] = df6["cumulate_earning_rates"]
    return df_combine

def mean_multiple_earnings(df_combine):
    mean_earning_rates = np.array([])
    files_address = "/Volumes/Expansion Drive/tkline"
    test_address = "Desktop/test case"
    #file_names = np.array(os.listdir(test_address))
    file_names = np.array(os.listdir(files_address))
    counts = df_combine.shape[1]-2
    for i in range(len(df_combine)):
        sums=0
        for j in file_names :
            sums+=df_combine[j][i]
        mean_earning_rates = np.append(mean_earning_rates, np.array([sums/counts]))
    df_combine["mean"] = mean_earning_rates
    return df_combine

# main
if __name__ == "__main__":
    df_combine = combine_multiple_cumulate_earnings()
    df_mean = mean_multiple_earnings(df_combine)
    #print(df_mean["mean"][-5:])
    df_mean["mean"].plot()
    #df_combine["sh600000_D_ExDiv.csv"].plot()
    #df_combine["sh600108_D_ExDiv.csv"].plot()
    plt.show()
    
    
    '''
    df_calendar= transform_calendar_csv()
    df = transform_single_csv(df_calendar)
    df2 = combine_moving_averages(df, 5, 20, price_choice="Close")
    df3 = find_average_matches(df2, 5, 20)
    df4 = calculate_earnings(df3, 20, buy_column="Open", sell_column="Open")
    df5 = combine_daily_earnings_to_calendar(df4, df_calendar)
    df6 = calculate_cumulate_earnings(df5)
    
    print(df6["cumulate_earning_rates"][170:300])
    
    df6["cumulate_earning_rates"].plot()
    df6["Close"].plot()
    plt.show()
    '''
    
    

In [None]:
# SH600108 Moving Average Earning
import numpy as np
import pandas as pd
import tushare as ts
import matplotlib.pyplot as plt

column_names = ['Date','Open','High','Low','Close','Volumn','Value','Rate']
file_name = "Desktop/sh600108_D_ExDiv.csv"

def transform_single_csv():
    df = pd.read_csv(file_name, header=None)
    df.columns = column_names
    return df

def calculate_moving_average(df, periods, price_choice="Close"):
    first_values = np.array([])
    second_values = np.array([])  
    for i in range(periods+1, len(df)):
        first_value = 0
        second_value = 0
        for j in range(0, periods):
            first_rate = df["Rate"][i-j-2]/df["Rate"][i-1]
            second_rate = df["Rate"][i-j-1]/df["Rate"][i-1]
            first_value += df[price_choice][i-j-2] * first_rate
            second_value += df[price_choice][i-j-1] * second_rate
        first_value = first_value/periods
        second_value = second_value/periods
        first_values = np.append(first_values, np.array([first_value]))
        second_values = np.append(second_values, np.array([second_value]))
    df = df[periods+1:]
    first_column = "first_{}_averages".format(periods)
    second_column = "second_{}_averages".format(periods)    
    df[first_column] = first_values
    df[second_column] = second_values
    return df

def combine_moving_averages(df, short_periods, long_periods, price_choice="Close"):
    short_df = calculate_moving_average(df, short_periods, price_choice)
    long_df = calculate_moving_average(df, long_periods, price_choice)  
    short_df = short_df[long_periods-short_periods:]
    long_first_column = "first_{}_averages".format(long_periods)
    long_second_column = "second_{}_averages".format(long_periods)
    short_df[long_first_column] = long_df[long_first_column]
    short_df[long_second_column] = long_df[long_second_column]
    return short_df 

def find_average_matches(df, short_periods, long_periods):
    match_points = np.array([])  
    first_value_differences = np.array([])
    second_value_differences = np.array([])
    short_first_column = "first_{}_averages".format(short_periods)
    short_second_column = "second_{}_averages".format(short_periods)
    long_first_column = "first_{}_averages".format(long_periods)
    long_second_column = "second_{}_averages".format(long_periods)
    for i in range(len(df)):
        real_index = i+long_periods+1
        first_value_difference = df[short_first_column][real_index] - df[long_first_column][real_index]
        second_value_difference = df[short_second_column][real_index] - df[long_second_column][real_index]
        first_value_differences = np.append(first_value_differences, np.array([first_value_difference]))
        second_value_differences = np.append(second_value_differences, np.array([second_value_difference]))    
        if (second_value_differences[-1]>=0 and first_value_differences[-1]<0):
            match_points = np.append(match_points, np.array([1]))
        elif (second_value_differences[-1]<=0 and first_value_differences[-1]>0):
            match_points = np.append(match_points, np.array([-1]))     
        else:
            match_points = np.append(match_points, np.array([0]))
    df["match_points"] = match_points
    return df

def calculate_earnings(df, long_periods, buy_column="Open", sell_column="Open"):
    buy_in = False 
    daily_earning_rates = np.array([])
    cumulate_earning_rates = np.array([])
    for i in range(len(df)):
        real_index = i+long_periods+1
        match_point = df["match_points"][real_index]
        if (match_point == 1 and not buy_in):
            daily_earning_rate = (df["Close"][real_index] - df[buy_column][real_index])/df[buy_column][real_index]
            daily_earning_rates = np.append(daily_earning_rates, np.array([daily_earning_rate]))
            cumulate_earning_rate = cumulate_earning_rates[-1] + daily_earning_rate
            cumulate_earning_rates = np.append(cumulate_earning_rates, np.array([cumulate_earning_rate]))
            buy_in = True            
        elif (match_point == -1 and buy_in):
            last_rate = df["Rate"][real_index-1]/df["Rate"][real_index]
            adjust_last_close = df["Close"][real_index-1] * last_rate
            daily_earning_rate = (df[sell_column][real_index]-adjust_last_close)/adjust_last_close
            daily_earning_rates = np.append(daily_earning_rates, np.array([daily_earning_rate]))
            cumulate_earning_rate = cumulate_earning_rates[-1] + daily_earning_rate
            cumulate_earning_rates = np.append(cumulate_earning_rates, np.array([cumulate_earning_rate]))
            buy_in = False
        elif (match_point == 0 and buy_in):   
            last_rate = df["Rate"][real_index-1]/df["Rate"][real_index]
            adjust_last_close = df["Close"][real_index-1] * last_rate
            daily_earning_rate = (df["Close"][real_index]-adjust_last_close)/adjust_last_close
            daily_earning_rates = np.append(daily_earning_rates, np.array([daily_earning_rate]))
            cumulate_earning_rate = cumulate_earning_rates[-1] + daily_earning_rate
            cumulate_earning_rates = np.append(cumulate_earning_rates, np.array([cumulate_earning_rate]))
        elif (match_point == 0 and not buy_in and len(cumulate_earning_rates)==0):
            daily_earning_rates = np.append(daily_earning_rates, np.array([0]))
            cumulate_earning_rates = np.append(cumulate_earning_rates, np.array([0]))
        else:
            daily_earning_rates = np.append(daily_earning_rates, np.array([0]))
            cumulate_earning_rates = np.append(cumulate_earning_rates, np.array([cumulate_earning_rates[-1]]))
    df["daily_earning_rates"] = daily_earning_rates
    df["cumulate_earning_rates"] = cumulate_earning_rates
    return df 

if __name__ == "__main__":
    df = transform_single_csv()
    #df1 = calculate_moving_average(df, 5, price_choice="Close")
    df2 = combine_moving_averages(df, 5, 20, price_choice="Close")
    df3 = find_average_matches(df2, 5, 20)
    df4 = calculate_earnings(df3, 20, buy_column="Open", sell_column="Open")
    print(df4[-5:])
    df4["cumulate_earning_rates"].plot()
    plt.show()
    

In [None]:
# File System and Daily Combine:

import tarfile
import numpy as np

def write_to_single_csv(date):
    if (date <= 20160816):
        k_line_file_address = "/Volumes/Expansion Drive/kline/D"+str(date)+".txt"
        factor_file_address = "/Volumes/Expansion Drive/kline/F"+str(date)+".csv"
    else:
        k_line_file_address = "/Volumes/Expansion Drive/dailydata/"+str(date)+"/kline/D"+str(date)+".txt"
        factor_file_address = "/Volumes/Expansion Drive/dailydata/"+str(date)+"/factor/F"+str(date)+".csv"   
    try:
        k_line_file = open(k_line_file_address, 'r')
        factor_file = open(factor_file_address, 'r')
    except:
        print("Does not Exist")
    for k_line in k_line_file.readlines(): 
        factor_line = factor_file.readline()
        factor = factor_line.split(",")[-1]
        stock_name = factor_line.split(",")[0]
        if (k_line[0:8] == factor_line[0:8] and factor_line!=None):
            stock_file_name = stock_name.lower()+"_D_ExDiv.csv"
            stock_file_address = "/Volumes/Expansion Drive/tkline/"+stock_file_name        
            final_k_line = str(date)+k_line.strip('\n')[8:]+",0,"+factor
            try:
                stock_file = open(stock_file_address, 'a')
                stock_file.write(final_k_line)
            except:
                continue
            stock_file.close()
    factor_file.close()
    k_line_file.close()
    
def write_from_kline_to_sh600000_test():
    string = "20160802,15.7900,15.8500,15.6800,15.7500,10877800,0,9.334\n"
    url = "/Volumes/Expansion Drive/tkline/sh600000_D_ExDiv.csv"
    file = open(url, 'a')
    file.write(string)
        
def write_from_kline_to_tkline():
    dates = np.array([20160804,20160805,20160808,20160809,20160810,20160811,20160812,20160815,20160816])
    for date in dates:
        write_to_single_csv(date)
    return "Finish Kline to Tkline"
    
def write_from_daily_to_tkline():
    dates = np.array([
        20160817,20160818,20160819,20160822,20160823,20160824,20160825,20160826,20160829,20160830,    
        20160831,20160901,20160902,20160905,20160906,20160907,20160908,20160909,20160912,20160913,
        20160914,20160919,20160920,20160921,20160922,20160923,20160926,20160927,20160928,20160929,
        20160930,20161010,20161011,20161012,20161013,20161014,20161017,20161018,20161019,20161020,
        20161021,20161024,20161025,20161026,20161027,20161028,20161031,20161101,20161102,20161103,
        20161104,20161107,20161108,20161109,20161110,20161111,20161114,20161115,20161116,20161117,
        20161118,20161121,20161122,20161123,20161124,20161125,20161128,20161129,20161130,20161201,
        20161202,20161205,20161206,20161207,20161208,20161209,20161212,20161213,20161214,20161215,
        20161216,20161219,20161220,20161221,20161222,20161223,20161226,20161227,20161228,20161229,
        20161230,20170103,20170104,20170105,20170106,20170109,20170110,20170111,20170112,20170113,
        20170116,20170117,20170118,20170119,20170120,20170123,20170124,20170125,20170126,20170203,
        20170206,20170207,20170208,20170209,20170210,20170213,20170214,20170215,20170216,20170217]) 
    counter = 0 
    for date in dates:
        write_from_single_daily_to_tkline(date)
        counter +=1
        print(counter)
    return "Finish Daily to Tkline"

def write_from_single_daily_to_tkline(date):
    file_address = "/Volumes/Expansion Drive/dailydata/"+str(date)+"_01.tar"
    tar = tarfile.open(file_address)
    tar.extractall("/Volumes/Expansion Drive/dailydata/")
    tar.close()
    write_to_single_csv(date)
    
if __name__ == "__main__":
    print("Have to run to add single daily file to tkline files everyday.")
    print("20160820 is the one to input but have not done yet.")
    
        

In [2]:
import datetime
print(datetime.datetime.now().strftime("%y/%m/%d"))

03/03/17
