# Library

## to Install

In [1]:
!pip install pendulum
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzvf ta-lib-0.4.0-src.tar.gz
%cd ta-lib
!./configure --prefix=/usr
!make
!make install
!pip install Ta-Lib
!pip install ta

Collecting pendulum
  Downloading pendulum-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (384 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m384.9/384.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting tzdata>=2020.1 (from pendulum)
  Downloading tzdata-2023.4-py2.py3-none-any.whl (346 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m346.6/346.6 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting time-machine>=2.6.0 (from pendulum)
  Downloading time_machine-2.13.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34 kB)
Installing collected packages: tzdata, time-machine, pendulum
Successfully installed pendulum-3.0.0 time-machine-2.13.0 tzdata-2023.4
--2024-01-13 18:24:48--  http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
Resolving prdownloads.sourceforge.net (prdownloads.sourceforge.net)... 204.68.111.105
Connecting to prdownloads.sourcefo

## to Import

In [2]:
# Import the Libraries

# Data Manupulation
import numpy as np
import pandas as pd
import pendulum
import scipy.stats as stats
from datetime import datetime, timedelta

# Techinical Indicators
import talib
import ta

# Plotting graphs
import matplotlib.pyplot as plt
import seaborn as sns

# Standardization
from sklearn.preprocessing import MinMaxScaler

# Machine learning
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
import xgboost as xgb
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV, KFold

# Evaluation
from sklearn.metrics import classification_report
from sklearn.metrics import make_scorer, roc_auc_score, confusion_matrix

# Data fetching
from pandas_datareader import data as pdr
import yfinance as yf
yf.pdr_override()

# Save model
import os
import joblib

import requests
import warnings

# Get Features Historical Data

In [32]:
# List of stock symbols
stock_symbols = ['TISCO.BK', 'ERW.BK', 'SPRC.BK',
                 'THB=X', 'CNY=X', 'GC=F', 'BZ=F', 'CL=F',
                 'BSET100.BK', 'TDEX.BK', 'TOP.BK', 'IRPC.BK', 'BCP.BK',
                 '^FINCIAL.BK', '^BANK.BK', '^SERVICE.BK', '^TOURISM.BK', '^RESOURC.BK', '^ENERG.BK']

# Mapping of stock symbols to categories
symbol_categories = {
    'BANKING': ['^BANK.BK'],
    'BRENT': ['BZ=F'],
    'CNY': ['CNY=X'],
    'CRUDE': ['CL=F'],
    'ENERGY': ['^ENERG.BK'],
    'FINCIAL': ['^FINCIAL.BK'],
    'GOLD': ['GC=F'],
    'RESOURCE': ['^RESOURC.BK'],
    'SERVICE': ['^SERVICE.BK'],
    'SET50': ['TDEX.BK'],
    'SET100': ['BSET100.BK'],
    'TOURISM': ['^TOURISM.BK'],
    'USD': ['THB=X'],
    'ERW': ['ERW.BK'],
    'TISCO': ['TISCO.BK'],
    'SPRC': ['SPRC.BK'],
    'IRPC': ['IRPC.BK'],
    'TOP': ['TOP.BK'],
    'BCP': ['BCP.BK'],
}

bkk_tz = 'Asia/Bangkok'
end_date = pendulum.now(bkk_tz).strftime('%Y-%m-%d')
start_date = (pendulum.now(bkk_tz) - pendulum.duration(days=59)).strftime('%Y-%m-%d')
interval = '15m'

# Define variables for each table
banking_data = []
brent_data = []
cny_data = []
crude_data = []
energy_data = []
fincial_data = []
gold_data = []
resource_data = []
service_data = []
set50_data = []
set100_data = []
tourism_data = []
usd_data = []
erw_data = []
tisco_data = []
sprc_data = []
irpc_data = []
top_data = []
bcp_data = []

for category, symbols in symbol_categories.items():
    print(f"\nProcessing data for category: {category}")

    for symbol in symbols:
        try:
            hist_data = yf.download(symbol, start=start_date, end=end_date, interval=interval)
            hist_data = hist_data.between_time('10:00', '16:30')
            locals()[f"{category.lower()}_data"].append(hist_data)
        except Exception as e:
            print(f"Failed to download data for '{symbol}': {e}")

# Convert each list to a DataFrame
bank = pd.concat(banking_data).reset_index()
brent = pd.concat(brent_data).reset_index()
cny = pd.concat(cny_data).reset_index()
crude = pd.concat(crude_data).reset_index()
energy = pd.concat(energy_data).reset_index()
fincial = pd.concat(fincial_data).reset_index()
gold = pd.concat(gold_data).reset_index()
resource = pd.concat(resource_data).reset_index()
service = pd.concat(service_data).reset_index()
set50 = pd.concat(set50_data).reset_index()
set100 = pd.concat(set100_data).reset_index()
tour = pd.concat(tourism_data).reset_index()
usd = pd.concat(usd_data).reset_index()
erw = pd.concat(erw_data).reset_index()
tisco = pd.concat(tisco_data).reset_index()
sprc = pd.concat(sprc_data).reset_index()
irpc = pd.concat(irpc_data).reset_index()
top = pd.concat(top_data).reset_index()
bcp = pd.concat(bcp_data).reset_index()


Processing data for category: BANKING
[*********************100%%**********************]  1 of 1 completed

Processing data for category: BRENT
[*********************100%%**********************]  1 of 1 completed

Processing data for category: CNY
[*********************100%%**********************]  1 of 1 completed

Processing data for category: CRUDE
[*********************100%%**********************]  1 of 1 completed

Processing data for category: ENERGY
[*********************100%%**********************]  1 of 1 completed

Processing data for category: FINCIAL
[*********************100%%**********************]  1 of 1 completed

Processing data for category: GOLD
[*********************100%%**********************]  1 of 1 completed

Processing data for category: RESOURCE
[*********************100%%**********************]  1 of 1 completed

Processing data for category: SERVICE
[*********************100%%**********************]  1 of 1 completed

Processing data for category: SET50
[

In [33]:
# Features (X)
tisco_close = tisco['Close']
erw_close = erw['Close']
sprc_close = sprc['Close']
tisco_vol = tisco['Volume']
erw_vol = erw['Volume']
sprc_vol = sprc['Volume']
usd = usd['Close']
cny = cny['Close']
set100 = set100['Close']
set50 = set50['Close']
gold = gold['Close']
fin = fincial['Close']
bank = bank['Close']
service = service['Close']
tour = tour['Close']
resource = resource['Close']
energy = energy['Close']
brent = brent['Close']
crude = crude['Close']
bcp = bcp['Close']
top = top['Close']
irpc = irpc['Close']

# SPRC Model

## Define Variables

In [34]:
## Set Y
sprc['Close_Shift'] = sprc['Close'].shift(-1)
sprc['Close_Shift'] = sprc['Close_Shift'].fillna(method='ffill')

## Set Y Condition
sprc['Signal'] = np.where(sprc['Close_Shift'] > sprc['Close'], 1, 0)

In [35]:
# Set Variables
# Set Y
sprc_signal = sprc['Signal']

# Reference date
ref_date = sprc['Datetime']

In [36]:
# Create DataFrames for each variable
sprc_df = pd.DataFrame({'Datetime': sprc['Datetime'], 'Signal': sprc['Signal'], 'sprc_close': sprc['Close'], 'sprc_vol': sprc['Volume']})
usd_df = pd.DataFrame({'Datetime': ref_date, 'usd_close': usd})
cny_df = pd.DataFrame({'Datetime': ref_date, 'cny_close': cny})
set100_df = pd.DataFrame({'Datetime': ref_date, 'set100_close': set100})
set50_df = pd.DataFrame({'Datetime': ref_date, 'set50_close': set50})
gold_df = pd.DataFrame({'Datetime': ref_date, 'gold_close': gold})
resource_df = pd.DataFrame({'Datetime': ref_date, 'resource_close': resource})
energy_df = pd.DataFrame({'Datetime': ref_date, 'energy_close': energy})
brent_df = pd.DataFrame({'Datetime': ref_date, 'brent_close': brent})
crude_df = pd.DataFrame({'Datetime': ref_date, 'crude_close': crude})
irpc_df = pd.DataFrame({'Datetime': ref_date, 'irpc_close': irpc})
top_df = pd.DataFrame({'Datetime': ref_date, 'top_close': top})
bcp_df = pd.DataFrame({'Datetime': ref_date, 'bcp_close': bcp})

fin_df = pd.DataFrame({'Datetime': ref_date, 'fincial_close': fin})
bank_df = pd.DataFrame({'Datetime': ref_date, 'bank_close': bank})

# Merge all DataFrames on the 'Datetime' column
sprc_merged = pd.merge(sprc_df, usd_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, cny_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, set100_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, set50_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, gold_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, resource_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, energy_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, brent_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, crude_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, irpc_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, top_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, bcp_df, on='Datetime', how='left')

sprc_merged = pd.merge(sprc_merged, fin_df, on='Datetime', how='left')
sprc_merged = pd.merge(sprc_merged, bank_df, on='Datetime', how='left')


# Display the merged DataFrame
print(sprc_merged)

               Datetime  Signal  sprc_close  sprc_vol  usd_close  cny_close  \
0   2023-11-16 10:00:00       1        8.05    319900  35.470001     7.2474   
1   2023-11-16 10:15:00       0        8.10   3218000  35.419998     7.2477   
2   2023-11-16 10:30:00       1        8.05    659504  35.410000     7.2476   
3   2023-11-16 10:45:00       0        8.10   1937500  35.450001     7.2476   
4   2023-11-16 11:00:00       0        8.10   1263200  35.450001     7.2476   
..                  ...     ...         ...       ...        ...        ...   
696 2024-01-12 15:15:00       1        8.30    666342  34.590000     7.1593   
697 2024-01-12 15:30:00       0        8.40    886500  34.564999     7.1593   
698 2024-01-12 15:45:00       0        8.35     83750  34.566002     7.1593   
699 2024-01-12 16:00:00       0        8.35    295500  34.570000     7.1593   
700 2024-01-12 16:15:00       0        8.35    947801  34.584999     7.1593   

     set100_close  set50_close   gold_close  resour

In [37]:
# Fill Null Values
sprc_merged.fillna(method = 'ffill', inplace = True)

## Adding Technical Features

In [38]:
# Create a copy of the DataFrame to avoid modifying the original data
sprc_data = sprc_merged.copy()

# Adding Technical Features
sprc_data['ema'] = ta.trend.ema_indicator(close=sprc_data['sprc_close'], window=14)
sprc_data['rsi'] = ta.momentum.RSIIndicator(close=sprc_data['sprc_close'], window=14).rsi()
sprc_data['obv'] = ta.volume.OnBalanceVolumeIndicator(close=sprc_data['sprc_close'], volume=sprc_data['sprc_vol']).on_balance_volume()
sprc_data['bb_upper'], _, sprc_data['bb_lower'] = ta.volatility.bollinger_hband(close=sprc_data['sprc_close'], window=20), ta.volatility.bollinger_mavg(close=sprc_data['sprc_close'], window=20), ta.volatility.bollinger_lband(close=sprc_data['sprc_close'], window=20)

sprc_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 701 entries, 0 to 700
Data columns (total 23 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Datetime        701 non-null    datetime64[ns]
 1   Signal          701 non-null    int64         
 2   sprc_close      701 non-null    float64       
 3   sprc_vol        701 non-null    int64         
 4   usd_close       701 non-null    float64       
 5   cny_close       701 non-null    float64       
 6   set100_close    701 non-null    float64       
 7   set50_close     701 non-null    float64       
 8   gold_close      701 non-null    float64       
 9   resource_close  701 non-null    float64       
 10  energy_close    701 non-null    float64       
 11  brent_close     701 non-null    float64       
 12  crude_close     701 non-null    float64       
 13  irpc_close      701 non-null    float64       
 14  top_close       701 non-null    float64       
 15  bcp_cl

In [44]:
sprc_features = ['sprc_close', 'set50_close', 'irpc_close', 'top_close', 'fincial_close', 'bank_close', 'ema', 'obv', 'bb_upper', 'bb_lower', 'resource_close', 'energy_close']

for column in sprc_features:
    median_value = sprc_data[column].median()
    sprc_data[column].fillna(median_value, inplace=True)

## Normalization

In [45]:
# Load the scaler
sprc_scaler = joblib.load("/content/sprc_scaler_strong_features.pkl")

sprc_scaled = sprc_scaler.fit_transform(sprc_data[sprc_features])
sprc_scaled

array([[0.43478275, 0.80952359, 0.68421072, ..., 0.59648354, 0.6792252 ,
        0.67933057],
       [0.47826119, 0.85714383, 0.78947382, ..., 0.59648354, 0.70828755,
        0.70756258],
       [0.43478275, 0.80952359, 0.78947382, ..., 0.59648354, 0.67168942,
        0.67086318],
       ...,
       [0.69565257, 0.59523934, 0.42105299, ..., 0.72871905, 0.48439217,
        0.48443497],
       [0.69565257, 0.61904719, 0.42105299, ..., 0.73062456, 0.44241158,
        0.44155418],
       [0.69565257, 0.57142922, 0.42105299, ..., 0.73062456, 0.43057082,
        0.4306146 ]])

In [46]:
X_predict = sprc_scaled
y_predict = sprc_signal

# Prediction

In [47]:
sprc_model = joblib.load("/content/sprc_final_model_strong_features.pkl")

y_prob = sprc_model.predict_proba(X_predict)
y_pred = sprc_model.predict(X_predict)



# AUC

In [48]:
test_auc = roc_auc_score(y_predict, y_prob[:, 1]) * 100
print(f"AUC Score: {test_auc:.4f}")

AUC Score: 64.9690
