In [102]:
!pip install ta
!pip install yahoo-finance
!pip install pandas
!pip install numpy



In [6]:
# Step 1: Required Libraries
import yfinance as yf
import pandas as pd
import datetime
import numpy as np
import ta

In [8]:
# Step 2: Download Bitcoin data
btc_data = yf.download('BTC-USD', start='2009-01-01', end=datetime.date.today())

[*********************100%%**********************]  1 of 1 completed


In [10]:
# Display the first few rows of the data
print(btc_data.head())

                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2014-09-17  465.864014  468.174011  452.421997  457.334015  457.334015   
2014-09-18  456.859985  456.859985  413.104004  424.440002  424.440002   
2014-09-19  424.102997  427.834991  384.532013  394.795990  394.795990   
2014-09-20  394.673004  423.295990  389.882996  408.903992  408.903992   
2014-09-21  408.084991  412.425995  393.181000  398.821014  398.821014   

              Volume  
Date                  
2014-09-17  21056800  
2014-09-18  34483200  
2014-09-19  37919700  
2014-09-20  36863600  
2014-09-21  26580100  


In [12]:
# Step 3: Create lagged price features
# Creating lagged features for the 'Close' price
btc_data['BTC_Close_lag1'] = btc_data['Close'].shift(1)
btc_data['BTC_Close_lag2'] = btc_data['Close'].shift(2)
btc_data['BTC_Close_lag3'] = btc_data['Close'].shift(3)
btc_data['BTC_Close_lag4'] = btc_data['Close'].shift(4)
btc_data['BTC_Close_lag5'] = btc_data['Close'].shift(5)
btc_data['BTC_Close_lag10'] = btc_data['Close'].shift(10)
btc_data['BTC_Close_lag20'] = btc_data['Close'].shift(20)
btc_data['BTC_Close_lag30'] = btc_data['Close'].shift(30)

In [14]:
# Step 4: Create Moving Average Features
btc_data['BTC_MA5'] = btc_data['Close'].rolling(window=5).mean()
btc_data['BTC_MA7'] = btc_data['Close'].rolling(window=7).mean()
btc_data['BTC_MA10'] = btc_data['Close'].rolling(window=10).mean()
btc_data['BTC_MA14'] = btc_data['Close'].rolling(window=14).mean()
btc_data['BTC_MA20'] = btc_data['Close'].rolling(window=20).mean()
btc_data['BTC_MA30'] = btc_data['Close'].rolling(window=30).mean()
btc_data['BTC_MA50'] = btc_data['Close'].rolling(window=50).mean()
btc_data['BTC_MA100'] = btc_data['Close'].rolling(window=100).mean()
btc_data['BTC_MA200'] = btc_data['Close'].rolling(window=200).mean()

In [16]:
# Step 5: Rate of Change (ROC)
btc_data['ROC_10'] = btc_data['Close'].pct_change(periods=10) * 100

In [18]:
# Step 6: Relative Strength Index (RSI)
delta = btc_data['Close'].diff()

gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)

avg_gain = gain.rolling(window=14, min_periods=1).mean()
avg_loss = loss.rolling(window=14, min_periods=1).mean()

rs = avg_gain / avg_loss

btc_data['RSI'] = 100 - (100 / (1 + rs))

In [20]:
# Data Check - Display the first few rows of the data
print(btc_data.head())

                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2014-09-17  465.864014  468.174011  452.421997  457.334015  457.334015   
2014-09-18  456.859985  456.859985  413.104004  424.440002  424.440002   
2014-09-19  424.102997  427.834991  384.532013  394.795990  394.795990   
2014-09-20  394.673004  423.295990  389.882996  408.903992  408.903992   
2014-09-21  408.084991  412.425995  393.181000  398.821014  398.821014   

              Volume  BTC_Close_lag1  BTC_Close_lag2  BTC_Close_lag3  \
Date                                                                   
2014-09-17  21056800             NaN             NaN             NaN   
2014-09-18  34483200      457.334015             NaN             NaN   
2014-09-19  37919700      424.440002      457.334015             NaN   
2014-09-20  36863600      394.795990      424.440002      457.334015   
2014-09-21  26580100      408.903992      394.795

In [22]:
# Data Check - Display the last few rows of the data
print(btc_data.tail())

                    Open          High           Low         Close  \
Date                                                                 
2024-08-10  60881.230469  61464.511719  60287.566406  60945.812500   
2024-08-11  60944.890625  61778.660156  58348.824219  58719.484375   
2024-08-12  58719.394531  60680.332031  57688.898438  59354.515625   
2024-08-13  59356.207031  61572.398438  58506.253906  60609.566406   
2024-08-14  60611.050781  61687.757812  58472.875000  58737.269531   

               Adj Close       Volume  BTC_Close_lag1  BTC_Close_lag2  \
Date                                                                    
2024-08-10  60945.812500  15745822278    60880.113281    61710.136719   
2024-08-11  58719.484375  22759754812    60945.812500    60880.113281   
2024-08-12  59354.515625  37078637820    58719.484375    60945.812500   
2024-08-13  60609.566406  30327698167    59354.515625    58719.484375   
2024-08-14  58737.269531  29961696180    60609.566406    59354.515625  

In [24]:
# Step 7: Standard Deviation (Volatility)
btc_data['Volatility'] = btc_data['Close'].rolling(window=20).std()

In [26]:
# Step 8: Average True Range (ATR)
btc_data['TR'] = np.maximum(btc_data['High'] - btc_data['Low'], 
                            np.abs(btc_data['High'] - btc_data['Close'].shift(1)),
                            np.abs(btc_data['Low'] - btc_data['Close'].shift(1)))
btc_data['ATR'] = btc_data['TR'].rolling(window=14).mean()

In [28]:
# Step 9: Bollinger Bands
btc_data['Bollinger_Mid'] = btc_data['Close'].rolling(window=20).mean()
btc_data['Bollinger_Upper'] = btc_data['Bollinger_Mid'] + (btc_data['Close'].rolling(window=20).std() * 2)
btc_data['Bollinger_Lower'] = btc_data['Bollinger_Mid'] - (btc_data['Close'].rolling(window=20).std() * 2)

In [30]:
# Step 10: Exponential Moving Average (EMA)
btc_data['EMA12'] = btc_data['Close'].ewm(span=12, adjust=False).mean()
btc_data['EMA26'] = btc_data['Close'].ewm(span=26, adjust=False).mean()

In [32]:
# Step 11: On-Balance Volume (OBV)
btc_data['Daily_Change'] = btc_data['Close'].diff()
btc_data['OBV'] = np.where(btc_data['Daily_Change'] > 0, btc_data['Volume'], -btc_data['Volume']).cumsum()

In [34]:
# Step 12: Stochastic Oscillator
btc_data['14-High'] = btc_data['High'].rolling(window=14).max()
btc_data['14-Low'] = btc_data['Low'].rolling(window=14).min()
btc_data['Stochastic_Oscillator'] = 100 * ((btc_data['Close'] - btc_data['14-Low']) / (btc_data['14-High'] - btc_data['14-Low']))

In [36]:
# Step 13: Moving Average Convergence Divergence (MACD)
btc_data['MACD_Line'] = btc_data['EMA12'] - btc_data['EMA26']
btc_data['Signal_Line'] = btc_data['MACD_Line'].ewm(span=9, adjust=False).mean()
btc_data['MACD_Histogram'] = btc_data['MACD_Line'] - btc_data['Signal_Line']

In [38]:
# Data Check - Display the last few rows of the data
print(btc_data.tail())

                    Open          High           Low         Close  \
Date                                                                 
2024-08-10  60881.230469  61464.511719  60287.566406  60945.812500   
2024-08-11  60944.890625  61778.660156  58348.824219  58719.484375   
2024-08-12  58719.394531  60680.332031  57688.898438  59354.515625   
2024-08-13  59356.207031  61572.398438  58506.253906  60609.566406   
2024-08-14  60611.050781  61687.757812  58472.875000  58737.269531   

               Adj Close       Volume  BTC_Close_lag1  BTC_Close_lag2  \
Date                                                                    
2024-08-10  60945.812500  15745822278    60880.113281    61710.136719   
2024-08-11  58719.484375  22759754812    60945.812500    60880.113281   
2024-08-12  59354.515625  37078637820    58719.484375    60945.812500   
2024-08-13  60609.566406  30327698167    59354.515625    58719.484375   
2024-08-14  58737.269531  29961696180    60609.566406    59354.515625  

In [40]:
# Display basic information about the dataset
print(btc_data.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3620 entries, 2014-09-17 to 2024-08-14
Data columns (total 41 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Open                   3620 non-null   float64
 1   High                   3620 non-null   float64
 2   Low                    3620 non-null   float64
 3   Close                  3620 non-null   float64
 4   Adj Close              3620 non-null   float64
 5   Volume                 3620 non-null   int64  
 6   BTC_Close_lag1         3619 non-null   float64
 7   BTC_Close_lag2         3618 non-null   float64
 8   BTC_Close_lag3         3617 non-null   float64
 9   BTC_Close_lag4         3616 non-null   float64
 10  BTC_Close_lag5         3615 non-null   float64
 11  BTC_Close_lag10        3610 non-null   float64
 12  BTC_Close_lag20        3600 non-null   float64
 13  BTC_Close_lag30        3590 non-null   float64
 14  BTC_MA5                3616 non-null  

In [42]:
# Step 14: Parabolic SAR
btc_data['PSAR'] = ta.trend.PSARIndicator(high=btc_data['High'], low=btc_data['Low'], close=btc_data['Close']).psar()

  self._psar[i] = high2


In [44]:
# Step 15: Williams %R
btc_data['Williams_%R'] = ta.momentum.WilliamsRIndicator(high=btc_data['High'], low=btc_data['Low'], close=btc_data['Close'], lbp=14).williams_r()

In [46]:
# Step 16: Commodity Channel Index (CCI)
btc_data['CCI'] = ta.trend.CCIIndicator(high=btc_data['High'], low=btc_data['Low'], close=btc_data['Close'], window=20).cci()

In [48]:
# Step 17: Accumulation/Distribution Line (A/D Line)
btc_data['AD_Line'] = ta.volume.AccDistIndexIndicator(high=btc_data['High'], low=btc_data['Low'], close=btc_data['Close'], volume=btc_data['Volume']).acc_dist_index()

In [50]:
# Step 18: Chaikin Money Flow (CMF)
btc_data['CMF'] = ta.volume.ChaikinMoneyFlowIndicator(high=btc_data['High'], low=btc_data['Low'], close=btc_data['Close'], volume=btc_data['Volume']).chaikin_money_flow()

In [52]:
# Step 19: True Strength Index (TSI)
btc_data['TSI'] = ta.momentum.TSIIndicator(close=btc_data['Close']).tsi()

In [54]:
# Step 20: Average Directional Index (ADX)
btc_data['ADX'] = ta.trend.ADXIndicator(high=btc_data['High'], low=btc_data['Low'], close=btc_data['Close'], window=14).adx()

In [56]:
# Step 21: Price Rate of Change (PROC)
btc_data['PROC_20'] = btc_data['Close'].pct_change(periods=20) * 100

In [58]:
# Step 22: Z-Score
btc_data['Z_Score'] = (btc_data['Close'] - btc_data['Close'].rolling(window=20).mean()) / btc_data['Close'].rolling(window=20).std()

In [60]:
# Step 23: Volume-Based Moving Averages
btc_data['Volume_MA5'] = btc_data['Volume'].rolling(window=5).mean()
btc_data['Volume_MA7'] = btc_data['Volume'].rolling(window=7).mean()
btc_data['Volume_MA10'] = btc_data['Volume'].rolling(window=10).mean()
btc_data['Volume_MA14'] = btc_data['Volume'].rolling(window=14).mean()
btc_data['Volume_MA20'] = btc_data['Volume'].rolling(window=20).mean()
btc_data['Volume_MA30'] = btc_data['Volume'].rolling(window=30).mean()
btc_data['Liquidity_Ratio'] = btc_data['Volume'] / btc_data['Volume'].rolling(window=30).mean()

In [62]:
# Step 24: Log Returns and Cumulative Returns
btc_data['Log_Returns'] = np.log(btc_data['Close'] / btc_data['Close'].shift(1))
btc_data['Cumulative_Returns'] = (1 + btc_data['Log_Returns']).cumprod() - 1

In [64]:
# Step 25: Calculate Sharpe Ratio
btc_data['Daily_Return'] = btc_data['Close'].pct_change()

# Calculate rolling mean and standard deviation
rolling_mean = btc_data['Daily_Return'].rolling(window=7).mean()
rolling_std = btc_data['Daily_Return'].rolling(window=7).std()

# Calculate rolling Sharpe Ratio (7 days rolling)
btc_data['Rolling_Sharpe_Ratio'] = rolling_mean / rolling_std

In [66]:
# Display basic information about the dataset
print(btc_data.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3620 entries, 2014-09-17 to 2024-08-14
Data columns (total 61 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Open                   3620 non-null   float64
 1   High                   3620 non-null   float64
 2   Low                    3620 non-null   float64
 3   Close                  3620 non-null   float64
 4   Adj Close              3620 non-null   float64
 5   Volume                 3620 non-null   int64  
 6   BTC_Close_lag1         3619 non-null   float64
 7   BTC_Close_lag2         3618 non-null   float64
 8   BTC_Close_lag3         3617 non-null   float64
 9   BTC_Close_lag4         3616 non-null   float64
 10  BTC_Close_lag5         3615 non-null   float64
 11  BTC_Close_lag10        3610 non-null   float64
 12  BTC_Close_lag20        3600 non-null   float64
 13  BTC_Close_lag30        3590 non-null   float64
 14  BTC_MA5                3616 non-null  

In [68]:
# Data Check - Display the last few rows of the data
print(btc_data.tail())

                    Open          High           Low         Close  \
Date                                                                 
2024-08-10  60881.230469  61464.511719  60287.566406  60945.812500   
2024-08-11  60944.890625  61778.660156  58348.824219  58719.484375   
2024-08-12  58719.394531  60680.332031  57688.898438  59354.515625   
2024-08-13  59356.207031  61572.398438  58506.253906  60609.566406   
2024-08-14  60611.050781  61687.757812  58472.875000  58737.269531   

               Adj Close       Volume  BTC_Close_lag1  BTC_Close_lag2  \
Date                                                                    
2024-08-10  60945.812500  15745822278    60880.113281    61710.136719   
2024-08-11  58719.484375  22759754812    60945.812500    60880.113281   
2024-08-12  59354.515625  37078637820    58719.484375    60945.812500   
2024-08-13  60609.566406  30327698167    59354.515625    58719.484375   
2024-08-14  58737.269531  29961696180    60609.566406    59354.515625  

In [70]:
# Check for duplicates
print(btc_data.duplicated().sum())

0


In [72]:
# Check for constant columns
constant_columns = [col for col in btc_data.columns if btc_data[col].std() == 0]
print(f'Constant columns: {constant_columns}')

# Drop constant columns if any
#btc_data = btc_data.drop(columns=constant_columns)

Constant columns: []


In [144]:
# Save the DataFrame to an Excel file
btc_data.to_excel('btc_data_with_features.xlsx', index=True)