# Use Kucoin Spot, test & train supervised data fetch

In [None]:
import ccxt
import os
from time import sleep
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from datetime import datetime
import pickle
import csv
from dotenv import load_dotenv
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from scipy import stats
import pandas as pd
from finta import TA
import traceback

load_dotenv()

class TradingBot:
    def __init__(self, symbol): #amount, take_profit_percentage, stop_loss_percentage):
        load_dotenv()
        self.symbol = symbol
        self.amount = amount
        self.take_profit_percentage = take_profit_percentage
        self.stop_loss_percentage = stop_loss_percentage
        self.exchange = ccxt.kucoin({
            'apiKey': os.getenv('API_KEY'),
            'secret': os.getenv('SECRET_KEY'),
            'password': os.getenv('PASSPHRASE'),
            'enableRateLimit': True
        })
        
        # Initialize these attributes
        self.X_train = None
        self.y_train = None
        self.X_val = None
        self.y_val = None

    def calculate_atr(self, data, period=14):
        df = pd.DataFrame(data, columns=["timestamp", "open", "high", "low", "close"])
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        df.set_index('timestamp', inplace=True)
        df['ATR'] = TA.ATR(df, period)

        # Print ATR analysis
        print("ATR Analysis:")
        print("--------------")
        print("Latest ATR value:", df['ATR'].iloc[-1])
        print("ATR Percentile Analysis:")
        print("Percentiles: 10% - 90%")
        print(df['ATR'].describe(percentiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]))
        print("=" * 50)  # Add a separator
        return df['ATR'].iloc[-1]
    
    def save_data_to_csv(self, data, filename):
        header = ["timestamp", "open", "high", "low", "close"]
        with open(filename, 'a', newline='') as csv_file:
            writer = csv.writer(csv_file)
            if csv_file.tell() == 0:
                writer.writerow(header)
            for d in data:
                writer.writerow(d)
                
    def fetch_order_book(self, symbol='BTC/USDT'):
        try:
            order_book = self.exchange.fetch_order_book(symbol)
            bids = np.array(order_book['bids'])  # Convert to NumPy array
            asks = np.array(order_book['asks'])  # Convert to NumPy array

            return bids, asks

        except ccxt.NetworkError as e:
            self.handle_exception(f"Network error: {e}")
        except ccxt.ExchangeError as e:
            self.handle_exception(f"Exchange error: {e}")
        except Exception as e:
            self.handle_exception(f"An unexpected error occurred: {e}")

    def calculate_best_bids_asks(self, bids, asks, num_levels=5):
        # Sort bids and asks by price in descending order
        sorted_bids = sorted(bids, key=lambda x: x[0], reverse=True)
        sorted_asks = sorted(asks, key=lambda x: x[0])

        # Select top num_levels bids and asks
        best_bids = sorted_bids[:num_levels]
        best_asks = sorted_asks[:num_levels]

        return best_bids, best_asks

    def fetch_data_and_preprocess(self, timeframe='15m', limit=1000):
        try:
            data = []
            since = None
            ohlcv_features = None  # Initialize ohlcv_features

            while True:
                ohlcv = self.exchange.fetch_ohlcv(self.symbol, timeframe, since=since, limit=limit)

                if len(ohlcv) == 0:
                    break
                since = ohlcv[-1][0] + 1
                data.extend(list(ohlcv_point) for ohlcv_point in ohlcv)

                # Extract OHLCV features
                ohlcv_features = np.array([d[0:6] for d in data if len(d) == 6])

                # Fetch order book data
                bids, asks = self.fetch_order_book(self.symbol)

                # Calculate best bids and asks
                best_bids, best_asks = self.calculate_best_bids_asks(bids, asks, num_levels=5)

                # Convert lists to NumPy arrays
                best_bids = np.array(best_bids)
                best_asks = np.array(best_asks)
                
            
                # Concatenate OHLCV and order book features
                concatenated_features = np.concatenate((ohlcv_features[-5:, :], best_bids, best_asks), axis=1)
                
                print(f"Shape of concatenated features: {concatenated_features.shape}")

                # Print the shapes
                print("Shapes - ohlcv_features:", ohlcv_features.shape)
                print("Shapes - best_bids:", best_bids.shape)
                print("Shapes - best_asks:", best_asks.shape)

                print("=" * 50)
                print(f"Number of data points before preprocessing: {len(data)}")
                
                #print("Shape of features before modification:", features.shape)

                # Create the features array
                features = np.array([d[0:6] for d in data if len(d) == 6])
                
                print("Shape of features after creation:", features.shape)
                print("Shape of features after modification:", features.shape)

                # Print the shape of features
                print(f"Shape of features: {features.shape}")

                for d in data:
                    if len(d) != 6:
                        print("Mismatched columns in data:", d)

                print(f"Number of features before removing incomplete rows: {features.shape[0]}")

                if np.isnan(features).any():
                    mask = ~np.isnan(features).any(axis=1)
                    data = [d for d, m in zip(data, mask) if m]
                    features = features[mask]

                print(f"Number of features after removing incomplete rows: {features.shape[0]}")

                scaler = MinMaxScaler()
                scaled_features = scaler.fit_transform(features)

                print(f"Mean values of scaled features: {np.mean(scaled_features, axis=0)}")
                print(f"Standard deviation of scaled features: {np.std(scaled_features, axis=0)}")
                print(f"Min values of scaled features: {np.min(scaled_features, axis=0)}")
                print(f"Max values of scaled features: {np.max(scaled_features, axis=0)}")
                print(f"Range of features: {np.max(scaled_features, axis=0) - np.min(scaled_features, axis=0)}")

                z_scores = np.abs(stats.zscore(scaled_features))
                threshold = 3
                outlier_mask = (z_scores < threshold).all(axis=1)

                print(f"Number of features after removing outliers: {np.sum(outlier_mask)}")

                data = [d for d, o in zip(data, outlier_mask) if o]
                scaled_features = scaled_features[outlier_mask]
                      
                print("Before the error:")
                print("Shapes - ohlcv_features:", ohlcv_features.shape)
                print("Shapes - best_bids:", best_bids.shape)
                print("Shapes - best_asks:", best_asks.shape)
                print("Shape of concatenated features:", concatenated_features.shape)
                print("Shape of features:", features.shape)


                # Define the deterministic objective function
                def deterministic_objective_function(d):
                    return 1 if (len(d) == 6 and isinstance(d[4], (int, float)) and isinstance(d[1], (int, float)) and float(d[4]) < float(d[1])) else 0

                # Generate the target using the deterministic objective function
                target = np.array([deterministic_objective_function(d) for d in data])

                self.save_data_to_csv(data, 'RF_btc.csv')  # Updated filename to reflect Random Forest
                mean_values = np.mean(scaled_features, axis=0)
                std_deviation = np.std(scaled_features, axis=0)
                min_values = np.min(scaled_features, axis=0)
                max_values = np.max(scaled_features, axis=0)
                feature_ranges = max_values - min_values
                for i in range(scaled_features.shape[1]):
                    percentage_removed = (np.sum(~outlier_mask) / len(data)) * 100
                    #print(f"Percentage of Data Removed as Outliers: {percentage_removed:.2f}%")
                return data, scaled_features, scaler, target
        except Exception as e:
            print(f"An error occurred while fetching and preprocessing data: {e}")
            traceback.print_exc()
            return None, None

    def train_models(self, features, target):
        try:
            # Print statements for checking the content and types of data
             print("Features:")
             print(features[:5])  # Print the first 5 rows for inspection
             print("Target:")
             print(target[:5])  # Print the first 5 elements for inspection

            # Assign values to attributes
            self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(features, target, test_size=0.2, random_state=42)
            print("Shapes after train_test_split:")
            print("X_train:", self.X_train.shape)
            print("X_val:", self.X_val.shape)
            print("y_train:", self.y_train.shape)
            print("y_val:", self.y_val.shape)

            # Print the first few elements of X_train for inspection
            print("X_train (first 10 rows):", self.X_train[:10])

            # Print the first few elements of X_val for inspection
            print("X_val (first 10 rows):", self.X_val[:10])

            # Print the first few elements of y_train for inspection
            print("y_train (first 10 elements):", self.y_train[:10])

            # Print the first few elements of X_val for inspection
            print("y_val (first 10 rows):", self.y_val[:10])

            # Check if there are any string indices in X_train or X_val
            print("Types in X_train:", [type(val) for val in self.X_train.flatten()[:10]])
            print("Types in X_val:", [type(val) for val in self.X_val.flatten()[:10]])
            print("Types in y_train:", [type(val) for val in self.y_train.flatten()[:10]])
            print("Types in y_val:", [type(val) for val in self.y_val.flatten()[:10]])

            # Create a random forest classifier
            random_forest = RandomForestClassifier(random_state=42)
        
            
            # Define a hyperparameter search space for RandomForestClassifier
            param_space = {
                'n_estimators': Integer(50, 200),
                'max_depth': Integer(1, 30),
                'min_samples_split': Integer(2, 10),
                'min_samples_leaf': Integer(1, 4),
                'criterion': Categorical(['gini', 'entropy']),
                'max_features': Categorical(['sqrt', 'log2', None]),
            }

            # Use BayesSearchCV for dynamic hyperparameter tuning
            bayes_search = BayesSearchCV(random_forest, param_space, cv=5, n_iter=30, n_jobs=-1)            
            bayes_search.fit(self.X_train, self.y_train)
            
            # Get the best model
            best_random_forest = bayes_search.best_estimator_

            # Print the best hyperparameters
            print("Best Hyperparameters:", bayes_search.best_params_)

            y_pred = best_random_forest.predict(self.X_val)

            classification_rep = classification_report(self.y_val, y_pred)
            confusion_mat = confusion_matrix(self.y_val, y_pred)

            print("Classification Report:")
            print(classification_rep)
            print("Confusion Matrix:")
            print(confusion_mat)
            
            accuracy_train = best_random_forest.score(self.X_train, self.y_train)  # Print accuracy on training set
            print(f"Accuracy on Training Set: {accuracy_train:.4f}")
            accuracy_val = best_random_forest.score(self.X_val, self.y_val)  # Print accuracy on validation set
            print(f"Accuracy on Validation Set: {accuracy_val:.4f}")

            # Parse the classification report
            lines = classification_rep.split('\n')
            data = [line.split() for line in lines[2:-3]]  # Extracting relevant rows

            # Ensure that each row has the expected structure
            if data:
                classes = [row[0] if len(row) > 0 else None for row in data]
                precision = [float(row[1]) if len(row) > 1 and row[1] is not None else None for row in data]
                recall = [float(row[2]) if len(row) > 2 and row[2] is not None else None for row in data]
                f1 = [float(row[3]) if len(row) > 3 and row[3] is not None else None for row in data]
                support = [int(row[4]) if len(row) > 4 else None for row in data]
            else:
                # Handle the case where data is empty
                classes, precision, recall, f1, support = [], [], [], [], []
                
            if f1 and any(x is not None for x in f1):
                weighted_avg_f1 = sum(x for x in f1 if x is not None) / len([x for x in f1 if x is not None])
                print(f"Weighted Average F1-Score: {weighted_avg_f1:.4f}")
            else:
                print("No data for calculating F1 score.")

            # Check if there is data for precision, recall, and f1
            if any(x is not None for x in precision):
                weighted_avg_precision = sum(x for x in precision if x is not None) / len([x for x in precision if x is not None])
                print(f"Weighted Average Precision: {weighted_avg_precision:.4f}")
            else:
                print("No data for calculating precision.")

            if any(x is not None for x in recall):
                weighted_avg_recall = sum(x for x in recall if x is not None) / len([x for x in recall if x is not None])
                print(f"Weighted Average Recall: {weighted_avg_recall:.4f}")
            else:
                print("No data for calculating recall.")

            precision = precision[-1]
            recall = recall[-1]
            f1 = f1[-1]

            print(f"Weighted Average Precision: {precision:.4f}")
            print(f"Weighted Average Recall: {recall:.4f}")
            print(f"Weighted Average F1-Score: {f1:.4f}")

            print("Random Forest Model Training Completed")

            print("Random Forest Model:", best_random_forest)  # Print the models for debugging
            with open('15mincheck15minochlvmodel.pkl', 'wb') as f:
                pickle.dump(best_random_forest, f)
            
            return best_random_forest
        
        except Exception as e:
            print(f"An error occurred while training models: {e}")
            traceback.print_exc()
            return None
        
    def predict_market_direction(self, data, rf_model, scaler):
        try:
            features = np.array([d[0:6] for d in data if len(d) == 6])
            if not features.any():
                print("No valid data available for prediction.")
                return None
            print("Shapes before scaling:")
            print("X_train:", self.X_train.shape)
            print("X_val:", self.X_val.shape)
            print("features:", features.shape)

            scaled_features = scaler.transform(features)

            print("Shapes after scaling:")
            print("X_train:", self.X_train.shape)
            print("X_val:", self.X_val.shape)
            print("scaled_features:", scaled_features.shape)

            rf_accuracy_train = rf_model.score(self.X_train, self.y_train)
            rf_accuracy_val = rf_model.score(self.X_val, self.y_val)
            print(f"Accuracy of Random Forest Model on Training Set: {rf_accuracy_train:.4f}")
            print(f"Accuracy of Random Forest Model on Validation Set: {rf_accuracy_val:.4f}")
            
            rf_prediction = rf_model.predict(scaled_features)
            print("Random Forest Model Prediction on Validation Set:")
            print(rf_prediction)
            # Ensure rf_prediction has the expected length
            print("Length of rf_prediction:", len(rf_prediction))
            
            final_prediction = rf_prediction[-1]
            print("Final Prediction (0 for Bullish, 1 for Bearish):")
            print(final_prediction)
            return final_prediction
        except Exception as e:
            print(f"An error occurred while predicting market direction: {e}")
            return None

    def create_order_with_percentage_levels(self, side, entry_price):
        try:
            take_profit_price = entry_price * (1 + self.take_profit_percentage / 100)
            stop_loss_price = entry_price * (1 - self.stop_loss_percentage / 100)
            main_order = self.exchange.create_order(
                self.symbol,
                type='limit',
                side=side,
                amount=self.amount,
                price=entry_price,
                params={
                    'postOnly': True,
                    'timeInForce': 'GTC',
                    'leverage': self.leverage
                }
            )
            print("Main Order Created:", main_order)
            stop_loss_order = self.exchange.create_order(
                self.symbol,
                type='limit',
                side='sell' if side == 'buy' else 'buy',
                amount=self.amount,
                price=stop_loss_price
            )
            print("Stop-Loss Order Created:", stop_loss_order)
            take_profit_order = self.exchange.create_order(
                self.symbol,
                type='limit',
                side='sell' if side == 'buy' else 'buy',
                amount=self.amount,
                price=take_profit_price
            )
            print("Take-Profit Order Created:", take_profit_order)
            return main_order, stop_loss_order, take_profit_order
        except Exception as e:
            print(f"Error creating orders with percentage-based levels: {e}")
            return None, None, None

    def create_limit_order(self, side, entry_price, trading_amount):
        try:
            # Create limit order
            main_order = self.exchange.create_order(
                self.symbol,
                type='limit',
                side=side,
                amount=trading_amount,
                price=entry_price,
                params={
                    'postOnly': False,
                    'timeInForce': 'GTC',
                    # You can add more parameters as needed
                }
            )

            # Fetch order details for confirmation
            order_details = self.exchange.fetch_order(main_order['id'])

            # Log order details
            self.logger.info(f"Main Order Created: {main_order}")
            self.logger.info(f"Order Details: {order_details}")

        except ccxt.NetworkError as e:
            self.logger.error(f"Network error: {e}")
        except ccxt.ExchangeError as e:
            self.logger.error(f"Exchange error: {e}")
        except Exception as e:
            self.logger.error(f"An error occurred: {e}")

    def calculate_midpoint_entry(self, best_bid, best_ask):
        midpoint = (best_bid + best_ask) / 2.0
        return midpoint


    def main_trading_loop(self):
        rf_model = None  # Initialize rf_model outside the loop

        while True:
            try:
                loop_start_time = datetime.now()

                while True:
                    current_time = datetime.now()
                    elapsed_time = current_time - loop_start_time

                    if elapsed_time.seconds < 10:
                        print(f"Waiting for {10 - elapsed_time.seconds} seconds before creating orders...")
                        sleep(10)

                    data, scaled_features, scaler, target = self.fetch_data_and_preprocess()
                    
                    # Calculate ATR
                    atr_value = self.calculate_atr(data)
                

                    # Train the random forest model
                    rf_model = self.train_models(scaled_features, target)
                    
                    print("Shape of features in the main trading loop:", scaled_features.shape)

                    if rf_model is not None:
                        print("Random Forest model was successfully trained.")
                        ticker = self.exchange.fetch_ticker(self.symbol)
                        bid, ask = ticker['bid'], ticker['ask']
                        midpoint = (bid + ask) / 2
                        current_time = datetime.now()
                        market_direction = self.predict_market_direction(data, rf_model, scaler)
                        if market_direction is not None:
                            print("The market is ---> {}".format(market_direction))
                            print(current_time.strftime("%B %d, %Y %I:%M %p"))
                            print("=" * 50)

                            if market_direction == 0:  # Bullish
                                suggested_limit_price = midpoint - 0.01
                            else:  # Bearish
                                suggested_limit_price = midpoint + 0.01
                                
                            # Use atr_value as needed in your logic
                            print("ATR Value:", atr_value)

                            Creating orders with a single level
                            self.create_order_with_percentage_levels('buy' if market_direction == 0 else 'sell', suggested_limit_price)

                            tp_price = suggested_limit_price * (1 + self.take_profit_percentage / 100)
                            sl_price = suggested_limit_price * (1 - self.stop_loss_percentage / 100)
                            print(f"Take-Profit Price: {tp_price}, Stop-Loss Price: {sl_price}")
                            print("\n" + "-" * 50)
                            start_time = current_time
                        else:
                            print("Error: Prediction failed.")
                            print("=" * 50)
                            sleep(30)
                    else:
                        print("Error: Training the random forest model failed.")
                        sleep(30)

            except Exception as e:
                print(f"An error occurred in the main trading loop: {e}")
                sleep(30)
                break  # Exit the loop after an exception

if __name__ == "__main__":
    trading_bot = TradingBot(symbol='BTC/USDT')
        amount=0.1,
        take_profit_percentage=1.35,
        stop_loss_percentage=1.35
    
    trading_bot.main_trading_loop()
