In [5]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import gc
import time
import datetime

In [6]:
# データ読み込み、各月の結合、欠損値を含む行の削除
# 1 : neg_count, neu_count, pos_count + end_price, trend
def read_data1(timespan,n):
    print("[read開始]\n")
    
    files = [f'tweet-svm/{timespan}/2021-01.csv',
             f'tweet-svm/{timespan}/2021-02.csv',
             f'tweet-svm/{timespan}/2021-03.csv',
             f'tweet-svm/{timespan}/2021-04.csv',
             f'tweet-svm/{timespan}/2021-05.csv',
             f'tweet-svm/{timespan}/2021-06.csv',
             f'tweet-svm/{timespan}/2021-07.csv']
    datas = []
    df = pd.DataFrame()
    for file in files:
        data = pd.read_csv(file, usecols = ['section','neg_count','neu_count','pos_count','open_price','trend'])
        datas.append(data)

    # ファイルの結合
    df = pd.concat(datas).reset_index(drop=True)
    df = df.drop(columns=['section'])
    
    # カラム名変更(区間No.割り当て)
    df = df.rename(columns={"neg_count":"neg_count(n)", 
                        "neu_count":"neu_count(n)", 
                        "pos_count":"pos_count(n)", 
                        "open_price":"open_price(n)",
                        "trend":"trend(n)"})
    
    print('欠損値削除前の総データ数：{}'.format(len(df)))
    
    # 目的変数
    df['trend(n+1)'] = df['trend(n)'].shift(-1)

    # 説明変数
    df['end_price(n)'] = df['open_price(n)'].shift(-1)
    if n >= 2:
        for i in range(1,n):
            df[f'neg_count(n-{i})'] = df['neg_count(n)'].shift(i)
            df[f'neu_count(n-{i})'] = df['neu_count(n)'].shift(i)
            df[f'pos_count(n-{i})'] = df['pos_count(n)'].shift(i)
            df[f'trend(n-{i})'] = df['trend(n)'].shift(i)
            df[f'end_price(n-{i})'] = df['end_price(n)'].shift(i)

    df = df.drop(columns=['open_price(n)'])

    # 欠損値を含む行を削除
    df = df.dropna(how='any')
    print('欠損値削除後の総データ数：{}\n'.format(len(df)))
    
    print("[read終了]")
    
    return df

In [7]:
# データ読み込み、各月の結合、欠損値を含む行の削除
# 2 : com_ave, tweet_count + end_price, trend
def read_data2(timespan,n):
    print("[read開始]\n")
    
    files = [f'tweet-svm/{timespan}/2021-01.csv',
             f'tweet-svm/{timespan}/2021-02.csv',
             f'tweet-svm/{timespan}/2021-03.csv',
             f'tweet-svm/{timespan}/2021-04.csv',
             f'tweet-svm/{timespan}/2021-05.csv',
             f'tweet-svm/{timespan}/2021-06.csv',
             f'tweet-svm/{timespan}/2021-07.csv']
    datas = []
    df = pd.DataFrame()
    for file in files:
        data = pd.read_csv(file, usecols = ['section','com_ave','tweet_count','open_price','trend'])
        datas.append(data)

    # ファイルの結合
    df = pd.concat(datas).reset_index(drop=True)
    df = df.drop(columns=['section'])
    
    # カラム名変更(区間No.割り当て)
    df = df.rename(columns={"com_ave":"com_ave(n)", 
                            "tweet_count":"tweet_count(n)", 
                            "open_price":"open_price(n)",
                            "trend":"trend(n)"})
    
    print('欠損値削除前の総データ数：{}'.format(len(df)))
    
    # 目的変数
    df['trend(n+1)'] = df['trend(n)'].shift(-1)

    # 説明変数
    df['end_price(n)'] = df['open_price(n)'].shift(-1)
    if n >= 2:
        for i in range(1,n):
            df[f'com_ave(n-{i})'] = df['com_ave(n)'].shift(i)
            df[f'tweet_count(n-{i})'] = df['tweet_count(n)'].shift(i)
            df[f'trend(n-{i})'] = df['trend(n)'].shift(i)
            df[f'end_price(n-{i})'] = df['end_price(n)'].shift(i)

    df = df.drop(columns=['open_price(n)'])

    # 欠損値を含む行を削除
    df = df.dropna(how='any')
    print('欠損値削除後の総データ数：{}\n'.format(len(df)))
    
    print("[read終了]")
    
    return df

In [8]:
# データ読み込み、各月の結合、欠損値を含む行の削除
# 3 : neg_count, neu_count, pos_count + com_ave, tweet_count + end_price, trend
def read_data3(timespan,n):
    print("[read開始]\n")
    
    files = [f'tweet-svm/{timespan}/2021-01.csv',
             f'tweet-svm/{timespan}/2021-02.csv',
             f'tweet-svm/{timespan}/2021-03.csv',
             f'tweet-svm/{timespan}/2021-04.csv',
             f'tweet-svm/{timespan}/2021-05.csv',
             f'tweet-svm/{timespan}/2021-06.csv',
             f'tweet-svm/{timespan}/2021-07.csv']
    datas = []
    df = pd.DataFrame()
    for file in files:
        data = pd.read_csv(file)
        datas.append(data)

    # ファイルの結合
    df = pd.concat(datas).reset_index(drop=True)
    df = df.drop(columns=['section'])
    
    # カラム名変更(区間No.割り当て)
    df = df.rename(columns={"neg_count":"neg_count(n)", 
                            "neu_count":"neu_count(n)", 
                            "pos_count":"pos_count(n)",
                            "com_ave":"com_ave(n)", 
                            "tweet_count":"tweet_count(n)", 
                            "open_price":"open_price(n)",
                            "trend":"trend(n)"})
    
    print('欠損値削除前の総データ数：{}'.format(len(df)))
    
    # 目的変数
    df['trend(n+1)'] = df['trend(n)'].shift(-1)

    # 説明変数
    df['end_price(n)'] = df['open_price(n)'].shift(-1)
    if n >= 2:
        for i in range(1,n):
            df[f'neg_count(n-{i})'] = df['neg_count(n)'].shift(i)
            df[f'neu_count(n-{i})'] = df['neu_count(n)'].shift(i)
            df[f'pos_count(n-{i})'] = df['pos_count(n)'].shift(i)
            df[f'com_ave(n-{i})'] = df['com_ave(n)'].shift(i)
            df[f'tweet_count(n-{i})'] = df['tweet_count(n)'].shift(i)
            df[f'trend(n-{i})'] = df['trend(n)'].shift(i)
            df[f'end_price(n-{i})'] = df['end_price(n)'].shift(i)
    
    df = df.drop(columns=['open_price(n)'])

    # 欠損値を含む行を削除
    df = df.dropna(how='any')
    print('欠損値削除後の総データ数：{}\n'.format(len(df)))
    
    print("[read終了]")
    
    return df

In [9]:
# データ読み込み、各月の結合、欠損値を含む行の削除
# 4 : neg_count, neu_count, pos_count
def read_data4(timespan,n):
    print("[read開始]\n")
    
    files = [f'tweet-svm/{timespan}/2021-01.csv',
             f'tweet-svm/{timespan}/2021-02.csv',
             f'tweet-svm/{timespan}/2021-03.csv',
             f'tweet-svm/{timespan}/2021-04.csv',
             f'tweet-svm/{timespan}/2021-05.csv',
             f'tweet-svm/{timespan}/2021-06.csv',
             f'tweet-svm/{timespan}/2021-07.csv']
    datas = []
    df = pd.DataFrame()
    for file in files:
        data = pd.read_csv(file, usecols = ['section','neg_count','neu_count','pos_count','trend'])
        datas.append(data)

    # ファイルの結合
    df = pd.concat(datas).reset_index(drop=True)
    df = df.drop(columns=['section'])
    
    # カラム名変更(区間No.割り当て)
    df = df.rename(columns={"neg_count":"neg_count(n)", 
                            "neu_count":"neu_count(n)", 
                            "pos_count":"pos_count(n)",
                            "trend":"trend(n)"})
    
    print('欠損値削除前の総データ数：{}'.format(len(df)))
    
    # 目的変数
    df['trend(n+1)'] = df['trend(n)'].shift(-1)

    # 説明変数
    if n >= 2:
        for i in range(1,n):
            df[f'neg_count(n-{i})'] = df['neg_count(n)'].shift(i)
            df[f'neu_count(n-{i})'] = df['neu_count(n)'].shift(i)
            df[f'pos_count(n-{i})'] = df['pos_count(n)'].shift(i)
    
    df = df.drop(columns=['trend(n)'])

    # 欠損値を含む行を削除
    df = df.dropna(how='any')
    print('欠損値削除後の総データ数：{}\n'.format(len(df)))
    
    print("[read終了]")
    
    return df

In [10]:
# データ読み込み、各月の結合、欠損値を含む行の削除
# 5 : com_ave, tweet_count
def read_data5(timespan,n):
    print("[read開始]\n")
    
    files = [f'tweet-svm/{timespan}/2021-01.csv',
             f'tweet-svm/{timespan}/2021-02.csv',
             f'tweet-svm/{timespan}/2021-03.csv',
             f'tweet-svm/{timespan}/2021-04.csv',
             f'tweet-svm/{timespan}/2021-05.csv',
             f'tweet-svm/{timespan}/2021-06.csv',
             f'tweet-svm/{timespan}/2021-07.csv']
    datas = []
    df = pd.DataFrame()
    for file in files:
        data = pd.read_csv(file, usecols = ['section','com_ave','tweet_count','trend'])
        datas.append(data)

    # ファイルの結合
    df = pd.concat(datas).reset_index(drop=True)
    df = df.drop(columns=['section'])
    
    # カラム名変更(区間No.割り当て)
    df = df.rename(columns={"com_ave":"com_ave(n)", 
                            "tweet_count":"tweet_count(n)", 
                            "trend":"trend(n)"})
    
    print('欠損値削除前の総データ数：{}'.format(len(df)))
    
    # 目的変数
    df['trend(n+1)'] = df['trend(n)'].shift(-1)

    # 説明変数
    if n >= 2:
        for i in range(1,n):
            df[f'com_ave(n-{i})'] = df['com_ave(n)'].shift(i)
            df[f'tweet_count(n-{i})'] = df['tweet_count(n)'].shift(i)
    
    df = df.drop(columns=['trend(n)'])

    # 欠損値を含む行を削除
    df = df.dropna(how='any')
    print('欠損値削除後の総データ数：{}\n'.format(len(df)))
    
    print("[read終了]")
    
    return df

In [11]:
# データ読み込み、各月の結合、欠損値を含む行の削除
# 6 : neg_count, neu_count, pos_count + com_ave, tweet_count
def read_data6(timespan,n):
    print("[read開始]\n")
    
    files = [f'tweet-svm/{timespan}/2021-01.csv',
             f'tweet-svm/{timespan}/2021-02.csv',
             f'tweet-svm/{timespan}/2021-03.csv',
             f'tweet-svm/{timespan}/2021-04.csv',
             f'tweet-svm/{timespan}/2021-05.csv',
             f'tweet-svm/{timespan}/2021-06.csv',
             f'tweet-svm/{timespan}/2021-07.csv']
    datas = []
    df = pd.DataFrame()
    for file in files:
        data = pd.read_csv(file, usecols = ['section','neg_count','neu_count','pos_count','com_ave','tweet_count','trend'])
        datas.append(data)

    # ファイルの結合
    df = pd.concat(datas).reset_index(drop=True)
    df = df.drop(columns=['section'])
    
    # カラム名変更(区間No.割り当て)
    df = df.rename(columns={"neg_count":"neg_count(n)", 
                            "neu_count":"neu_count(n)", 
                            "pos_count":"pos_count(n)",
                            "com_ave":"com_ave(n)", 
                            "tweet_count":"tweet_count(n)", 
                            "open_price":"open_price(n)",
                            "trend":"trend(n)"})
    
    print('欠損値削除前の総データ数：{}'.format(len(df)))
    
    # 目的変数
    df['trend(n+1)'] = df['trend(n)'].shift(-1)

    # 説明変数
    if n >= 2:
        for i in range(1,n):
            df[f'neg_count(n-{i})'] = df['neg_count(n)'].shift(i)
            df[f'neu_count(n-{i})'] = df['neu_count(n)'].shift(i)
            df[f'pos_count(n-{i})'] = df['pos_count(n)'].shift(i)
            df[f'com_ave(n-{i})'] = df['com_ave(n)'].shift(i)
            df[f'tweet_count(n-{i})'] = df['tweet_count(n)'].shift(i)
    
    df = df.drop(columns=['trend(n)'])

    # 欠損値を含む行を削除
    df = df.dropna(how='any')
    print('欠損値削除後の総データ数：{}\n'.format(len(df)))
    
    print("[read終了]")
    
    return df

In [12]:
# データ読み込み、各月の結合、欠損値を含む行の削除
# 7 : end_price, trend
def read_data7(timespan,n):
    print("[read開始]\n")
    
    files = [f'tweet-svm/{timespan}/2021-01.csv',
             f'tweet-svm/{timespan}/2021-02.csv',
             f'tweet-svm/{timespan}/2021-03.csv',
             f'tweet-svm/{timespan}/2021-04.csv',
             f'tweet-svm/{timespan}/2021-05.csv',
             f'tweet-svm/{timespan}/2021-06.csv',
             f'tweet-svm/{timespan}/2021-07.csv']
    datas = []
    df = pd.DataFrame()
    for file in files:
        data = pd.read_csv(file, usecols = ['section','open_price','trend'])
        datas.append(data)

    # ファイルの結合
    df = pd.concat(datas).reset_index(drop=True)
    df = df.drop(columns=['section'])
    
    # カラム名変更(区間No.割り当て)
    df = df.rename(columns={"open_price":"open_price(n)",
                            "trend":"trend(n)"})
    
    print('欠損値削除前の総データ数：{}'.format(len(df)))
    
    # 目的変数
    df['trend(n+1)'] = df['trend(n)'].shift(-1)

    # 説明変数
    df['end_price(n)'] = df['open_price(n)'].shift(-1)
    if n >= 2:
        for i in range(1,n):
            df[f'trend(n-{i})'] = df['trend(n)'].shift(i)
            df[f'end_price(n-{i})'] = df['end_price(n)'].shift(i)
    
    df = df.drop(columns=['open_price(n)'])

    # 欠損値を含む行を削除
    df = df.dropna(how='any')
    print('欠損値削除後の総データ数：{}\n'.format(len(df)))
    
    print("[read終了]")
    
    return df

In [13]:
# スケーリング x' = (x - xmin)/(xmax - xmin)
def scaling(x_train,x_test):
    print("[scaling開始]")
    
    scaler = MinMaxScaler().fit(x_train)

    x_train_scaled = scaler.transform(x_train)
    x_test_scaled  = scaler.transform(x_test)
    
    print("[scalig終了]")
    
    return x_train_scaled, x_test_scaled

In [14]:
# 学習 One-versus-the-rest
# Default ver. C=1, gamma=auto
def default_svm(x_train, y_train, x_test):
    print("[学習開始(Default)]")
    
    model = OneVsRestClassifier(SVC())
    model.fit(x_train, y_train)
    pred_y = model.predict(x_test)
    
    print("[学習終了(Default)]\n")
    
    return pred_y

In [None]:
# C,gammmaの最適値探索ver.
'''
def best_svm(x_train, y_train, x_test):
    model = OneVsRestClassifier(SVC())
    C_params = np.logspace(-4, 4, 5) #10^(-4) ~ 10^4 までで均等に5つの値
    gamma_params = np.logspace(-4, 4, 5)

    parameters = {'estimator__C': C_params,
                  'estimator__gamma': gamma_params}

    model_tuning = GridSearchCV(estimator = model,
                                param_grid = parameters,
                                n_jobs = -1,
                                verbose = 3
    )
    model_tuning.fit(x_train, y_train)
    pred_y2 = model_tuning.predict(test_x)

    # Best parameter
    model_tuning.best_params_
    
    return pred_y2
    
'''

In [15]:
# main
# all実行 (a=1~7, n=1~10, timespan×7の490通り)
alist = {1:'neg_count, neu_count, pos_count + end_price, trend',
         2:'com_ave, tweet_count + end_price, trend',
         3:'neg_count, neu_count, pos_count + com_ave, tweet_count + end_price, trend',
         4:'neg_count, neu_count, pos_count',
         5:'com_ave, tweet_count',
         6:'neg_count, neu_count, pos_count + com_ave, tweet_count',
         7:'end_price, trend'}
s = ('学習に用いる感情データの種類 a\n' 
     '1 : neg_count, neu_count, pos_count + end_price, trend\n'
     '2 : com_ave, tweet_count + end_price, trend\n'
     '3 : neg_count, neu_count, pos_count + com_ave, tweet_count + end_price, trend\n'
     '4 : neg_count, neu_count, pos_count\n'
     '5 : com_ave, tweet_count\n'
     '6 : neg_count, neu_count, pos_count + com_ave, tweet_count\n'
     '7 : end_price, trend')
print(s) 
print("学習に用いるデータの範囲 n\n(例1) n = 1 → 区間[n]\n(例2) n = 2 → 区間[n-1, n]")

alist_ = [1,2,3,4,5,6,7]
nlist = [1,2,3,4,5,6,7,8,9,10]
tlist = ['1d','12h','4h','1h','30m','15m','5m']
aculist = {}

for a in alist_:
    for n in nlist:
        for timespan in tlist:
            print("********************{}：開始********************\n".format(timespan))
            dt_start = datetime.datetime.now()
            print(f'開始時刻：{dt_start}\n')

            print(f'(a,n) = ({a},{n})')

            result = []

            #データの読み込み
            if a == 1:
                data = read_data1(timespan,n)
            elif a == 2:
                data = read_data2(timespan,n)
            elif a == 3:
                data = read_data3(timespan,n)
            elif a == 4:
                data = read_data4(timespan,n)
            elif a == 5:
                data = read_data5(timespan,n)
            elif a == 6:
                data = read_data6(timespan,n)
            elif a == 7:
                data = read_data7(timespan,n)
                
            # 説明変数、目的変数をセット
            x = data.iloc[:, data.columns!='trend(n+1)']
            y = data.loc[:, 'trend(n+1)']

            #データを訓練用とテスト用に分割
            #7か月のうち、7月をテストに、1~6月を訓練に分割
            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 1/7, shuffle=False)

            # スケーリング
            x_train_scaled, x_test_scaled = scaling(x_train, x_test)

            # 学習
            y_pred = default_svm(x_train_scaled, y_train, x_test_scaled)

            # 結果表示
            print ('accuracy(Default) : {:.5f}\n'.format(accuracy_score(y_test, y_pred)))
            #print ('最適地探索: {:.5f}'.format(accuracy_score(test_y, pred_y2)))

            # 混同行列
            print(confusion_matrix(y_test, y_pred))

            # precision, recall, f1-score, support
            print(classification_report(y_test, y_pred, target_names = ['neg','neu','pos']))

            dt_end = datetime.datetime.now()
            print(f'終了時刻：{dt_end}\n')

            # 結果を格納
            aculist[f'{timespan}'] = accuracy_score(y_test, y_pred)
            df_log = pd.read_csv('tweet-svm/svm2_log.csv')
            s = pd.Series([dt_start,
                           dt_end,
                           timespan,
                           alist[a],
                           n,
                           len(x.columns.values),
                           len(data),
                           len(y_train),
                           len(y_test),
                           accuracy_score(y_test, y_pred),
                           confusion_matrix(y_test, y_pred),
                           classification_report(y_test, y_pred,target_names=['neg','neu','pos'], output_dict=True)],
                    index=['start_time',
                           'end_time',
                           'timespan',
                           'kind_of_data',
                           'range_of_data',
                           'attribute',
                           'all_data',
                           'training_data',
                           'test_data',
                           'accuracy',
                           'confusion_matrix',
                           'score_report'])
            df_log = df_log.append(s, ignore_index=True)
            df_log.to_csv('tweet-svm/svm2_log.csv',index=False)

            # メモリ開放
            del data,x,y,x_train,x_test,y_train,y_test,x_train_scaled,x_test_scaled,y_pred,dt_start,dt_end,df_log
            gc.collect()

            print("********************{}：終了********************\n".format(timespan))
    
print(f'正答率\n{aculist}')

学習に用いる感情データの種類 a
1 : neg_count, neu_count, pos_count + end_price, trend
2 : com_ave, tweet_count + end_price, trend
3 : neg_count, neu_count, pos_count + com_ave, tweet_count + end_price, trend
4 : neg_count, neu_count, pos_count
5 : com_ave, tweet_count
6 : neg_count, neu_count, pos_count + com_ave, tweet_count
7 : end_price, trend
学習に用いるデータの範囲 n
(例1) n = 1 → 区間[n]
(例2) n = 2 → 区間[n-1, n]
********************1d：開始********************

開始時刻：2021-10-10 14:59:59.899405

(a,n) = (1,1)
[read開始]

欠損値削除前の総データ数：212
欠損値削除後の総データ数：211

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.38710

[[2 3 2]
 [3 4 6]
 [1 4 6]]
              precision    recall  f1-score   support

         neg       0.33      0.29      0.31         7
         neu       0.36      0.31      0.33        13
         pos       0.43      0.55      0.48        11

    accuracy                           0.39        31
   macro avg       0.38      0.38      0.37        31
weighted avg       0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


********************1d：終了********************

********************12h：開始********************

開始時刻：2021-10-10 20:30:08.054498

(a,n) = (3,10)
[read開始]

欠損値削除前の総データ数：424
欠損値削除後の総データ数：414

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.31667

[[ 3  4  8]
 [ 9  7 14]
 [ 3  3  9]]
              precision    recall  f1-score   support

         neg       0.20      0.20      0.20        15
         neu       0.50      0.23      0.32        30
         pos       0.29      0.60      0.39        15

    accuracy                           0.32        60
   macro avg       0.33      0.34      0.30        60
weighted avg       0.37      0.32      0.31        60

終了時刻：2021-10-10 20:30:08.117003

********************12h：終了********************

********************4h：開始********************

開始時刻：2021-10-10 20:30:08.163881

(a,n) = (3,10)
[read開始]

欠損値削除前の総データ数：1272
欠損値削除後の総データ数：1262

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Def

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


欠損値削除後の総データ数：418

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.50000

[[ 5  4  6]
 [ 4 19  7]
 [ 4  5  6]]
              precision    recall  f1-score   support

         neg       0.38      0.33      0.36        15
         neu       0.68      0.63      0.66        30
         pos       0.32      0.40      0.35        15

    accuracy                           0.50        60
   macro avg       0.46      0.46      0.46        60
weighted avg       0.51      0.50      0.51        60

終了時刻：2021-10-10 22:11:22.242977

********************12h：終了********************

********************4h：開始********************

開始時刻：2021-10-10 22:11:22.289866

(a,n) = (4,6)
[read開始]

欠損値削除前の総データ数：1272
欠損値削除後の総データ数：1266

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.49171

[[11 17 11]
 [11 61 24]
 [ 6 23 17]]
              precision    recall  f1-score   support

         neg       0.39      0.28      0.33        39
        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


欠損値削除前の総データ数：424
欠損値削除後の総データ数：419

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.45000

[[ 5  5  5]
 [ 7 19  4]
 [ 5  7  3]]
              precision    recall  f1-score   support

         neg       0.29      0.33      0.31        15
         neu       0.61      0.63      0.62        30
         pos       0.25      0.20      0.22        15

    accuracy                           0.45        60
   macro avg       0.39      0.39      0.39        60
weighted avg       0.44      0.45      0.45        60

終了時刻：2021-10-11 02:12:14.876980

********************12h：終了********************

********************4h：開始********************

開始時刻：2021-10-11 02:12:14.939480

(a,n) = (6,5)
[read開始]

欠損値削除前の総データ数：1272
欠損値削除後の総データ数：1267

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.49724

[[ 6 25  8]
 [11 69 16]
 [ 7 24 15]]
              precision    recall  f1-score   support

         neg       0.25      0.15      0.19  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


欠損値削除後の総データ数：418

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.35000

[[ 6  3  6]
 [ 8 11 11]
 [ 7  4  4]]
              precision    recall  f1-score   support

         neg       0.29      0.40      0.33        15
         neu       0.61      0.37      0.46        30
         pos       0.19      0.27      0.22        15

    accuracy                           0.35        60
   macro avg       0.36      0.34      0.34        60
weighted avg       0.42      0.35      0.37        60

終了時刻：2021-10-11 02:21:49.205102

********************12h：終了********************

********************4h：開始********************

開始時刻：2021-10-11 02:21:49.251979

(a,n) = (6,6)
[read開始]

欠損値削除前の総データ数：1272
欠損値削除後の総データ数：1266

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.49724

[[ 5 26  8]
 [ 5 73 18]
 [ 7 27 12]]
              precision    recall  f1-score   support

         neg       0.29      0.13      0.18        39
        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


欠損値削除後の総データ数：422

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.40984

[[ 9  1  6]
 [ 4  4 22]
 [ 1  2 12]]
              precision    recall  f1-score   support

         neg       0.64      0.56      0.60        16
         neu       0.57      0.13      0.22        30
         pos       0.30      0.80      0.44        15

    accuracy                           0.41        61
   macro avg       0.50      0.50      0.42        61
weighted avg       0.52      0.41      0.37        61

終了時刻：2021-10-11 03:35:04.203610

********************12h：終了********************

********************4h：開始********************

開始時刻：2021-10-11 03:35:04.250528

(a,n) = (7,2)
[read開始]

欠損値削除前の総データ数：1272
欠損値削除後の総データ数：1270

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.42857

[[34  0  6]
 [55  0 41]
 [ 2  0 44]]
              precision    recall  f1-score   support

         neg       0.37      0.85      0.52        40
        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[学習終了(Default)]

accuracy(Default) : 0.48690

[[158   0   5]
 [183   0 183]
 [  1   0 195]]
              precision    recall  f1-score   support

         neg       0.46      0.97      0.63       163
         neu       0.00      0.00      0.00       366
         pos       0.51      0.99      0.67       196

    accuracy                           0.49       725
   macro avg       0.32      0.65      0.43       725
weighted avg       0.24      0.49      0.32       725

終了時刻：2021-10-11 03:35:06.656736

********************1h：終了********************

********************30m：開始********************

開始時刻：2021-10-11 03:35:06.719248

(a,n) = (7,2)
[read開始]

欠損値削除前の総データ数：10176
欠損値削除後の総データ数：10152

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[学習終了(Default)]

accuracy(Default) : 0.47760

[[322   0  22]
 [380   0 356]
 [  0   0 371]]
              precision    recall  f1-score   support

         neg       0.46      0.94      0.62       344
         neu       0.00      0.00      0.00       736
         pos       0.50      1.00      0.66       371

    accuracy                           0.48      1451
   macro avg       0.32      0.65      0.43      1451
weighted avg       0.24      0.48      0.32      1451

終了時刻：2021-10-11 03:35:15.266133

********************30m：終了********************

********************15m：開始********************

開始時刻：2021-10-11 03:35:15.328658

(a,n) = (7,2)
[read開始]

欠損値削除前の総データ数：20352
欠損値削除後の総データ数：20307

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[学習終了(Default)]

accuracy(Default) : 0.49466

[[682   0  29]
 [644   0 793]
 [  0   0 753]]
              precision    recall  f1-score   support

         neg       0.51      0.96      0.67       711
         neu       0.00      0.00      0.00      1437
         pos       0.48      1.00      0.65       753

    accuracy                           0.49      2901
   macro avg       0.33      0.65      0.44      2901
weighted avg       0.25      0.49      0.33      2901

終了時刻：2021-10-11 03:35:49.797392

********************15m：終了********************

********************5m：開始********************

開始時刻：2021-10-11 03:35:49.859888

(a,n) = (7,2)
[read開始]

欠損値削除前の総データ数：61056
欠損値削除後の総データ数：60931

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[学習終了(Default)]

accuracy(Default) : 0.51671

[[2221    0    2]
 [2174   15 2031]
 [   0    0 2262]]
              precision    recall  f1-score   support

         neg       0.51      1.00      0.67      2223
         neu       1.00      0.00      0.01      4220
         pos       0.53      1.00      0.69      2262

    accuracy                           0.52      8705
   macro avg       0.68      0.67      0.46      8705
weighted avg       0.75      0.52      0.35      8705

終了時刻：2021-10-11 03:41:18.172387

********************5m：終了********************

********************1d：開始********************

開始時刻：2021-10-11 03:41:18.250512

(a,n) = (7,3)
[read開始]

欠損値削除前の総データ数：212
欠損値削除後の総データ数：209

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.26667

[[2 2 3]
 [4 0 8]
 [3 2 6]]
              precision    recall  f1-score   support

         neg       0.22      0.29      0.25         7
         neu       0.00      0.00      0.00        12
         pos  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[学習終了(Default)]

accuracy(Default) : 0.48259

[[657   0  54]
 [695   0 742]
 [ 10   0 743]]
              precision    recall  f1-score   support

         neg       0.48      0.92      0.63       711
         neu       0.00      0.00      0.00      1437
         pos       0.48      0.99      0.65       753

    accuracy                           0.48      2901
   macro avg       0.32      0.64      0.43      2901
weighted avg       0.24      0.48      0.32      2901

終了時刻：2021-10-11 03:42:11.266138

********************15m：終了********************

********************5m：開始********************

開始時刻：2021-10-11 03:42:11.328646

(a,n) = (7,3)
[read開始]

欠損値削除前の総データ数：61056
欠損値削除後の総データ数：60926

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[学習終了(Default)]

accuracy(Default) : 0.50678

[[2179    0   44]
 [2134    5 2080]
 [  35    0 2227]]
              precision    recall  f1-score   support

         neg       0.50      0.98      0.66      2223
         neu       1.00      0.00      0.00      4219
         pos       0.51      0.98      0.67      2262

    accuracy                           0.51      8704
   macro avg       0.67      0.66      0.45      8704
weighted avg       0.75      0.51      0.35      8704

終了時刻：2021-10-11 03:48:54.368251

********************5m：終了********************

********************1d：開始********************

開始時刻：2021-10-11 03:48:54.446379

(a,n) = (7,4)
[read開始]

欠損値削除前の総データ数：212
欠損値削除後の総データ数：208

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.30000

[[4 0 3]
 [2 2 8]
 [3 5 3]]
              precision    recall  f1-score   support

         neg       0.44      0.57      0.50         7
         neu       0.29      0.17      0.21        12
         pos  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[学習終了(Default)]

accuracy(Default) : 0.49684

[[2135    0   88]
 [2097   14 2107]
 [  87    0 2175]]
              precision    recall  f1-score   support

         neg       0.49      0.96      0.65      2223
         neu       1.00      0.00      0.01      4218
         pos       0.50      0.96      0.66      2262

    accuracy                           0.50      8703
   macro avg       0.66      0.64      0.44      8703
weighted avg       0.74      0.50      0.34      8703

終了時刻：2021-10-11 03:56:33.133882

********************5m：終了********************

********************1d：開始********************

開始時刻：2021-10-11 03:56:33.212012

(a,n) = (7,5)
[read開始]

欠損値削除前の総データ数：212
欠損値削除後の総データ数：207

[read終了]
[scaling開始]
[scalig終了]
[学習開始(Default)]
[学習終了(Default)]

accuracy(Default) : 0.26667

[[3 0 4]
 [3 0 9]
 [2 4 5]]
              precision    recall  f1-score   support

         neg       0.38      0.43      0.40         7
         neu       0.00      0.00      0.00        12
         pos  

In [4]:
#log リセット
#'''
df = pd.DataFrame(columns=['start_time',
                   'end_time',
                   'timespan',
                   'kind_of_data',
                   'range_of_data',
                   'attribute',
                   'all_data',
                   'training_data',
                   'test_data',
                   'accuracy',
                   'confusion_matrix',
                   'score_report'])
df.to_csv('tweet-svm/svm2_log.csv',index=False)
#'''