In [1]:
# Import the modules
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

In [2]:
# Read the CSV file from the Resources folder into a Pandas DataFrame
file_path = Path("ADA-USD.csv")
adaDF = pd.read_csv(file_path)

# Display sample data
adaDF.head() 

# Review the DataFrame
adaDF

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2018-01-01,0.718847,0.730051,0.671941,0.728657,0.728657,150186000
1,2018-01-02,0.724676,0.794646,0.697856,0.782587,0.782587,289712000
2,2018-01-03,0.779681,1.085670,0.778578,1.079660,1.079660,657398016
3,2018-01-04,1.094030,1.327210,1.037650,1.114120,1.114120,593430016
4,2018-01-05,1.171150,1.252420,0.903503,0.999559,0.999559,508100000
...,...,...,...,...,...,...,...
1966,2023-05-21,0.366352,0.367398,0.358336,0.360363,0.360363,116595379
1967,2023-05-22,0.360382,0.373213,0.357263,0.367997,0.367997,177956408
1968,2023-05-23,0.368004,0.375178,0.366596,0.370484,0.370484,141912382
1969,2023-05-24,0.370500,0.370566,0.360984,0.364234,0.364234,175329362


In [3]:
adaDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1971 entries, 0 to 1970
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1971 non-null   object 
 1   Open       1971 non-null   float64
 2   High       1971 non-null   float64
 3   Low        1971 non-null   float64
 4   Close      1971 non-null   float64
 5   Adj Close  1971 non-null   float64
 6   Volume     1971 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 107.9+ KB


In [4]:
# Convert 'date' column to datetime
adaDF['Date'] = pd.to_datetime(adaDF['Date'])

# Extract year, month, and day into separate columns
adaDF['year'] = adaDF['Date'].dt.year.astype(int)
adaDF['month'] = adaDF['Date'].dt.month.astype(int)
adaDF['day'] = adaDF['Date'].dt.day.astype(int)

adaDF.set_index("Date", inplace=True)

# Display the updated DataFrame
adaDF


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-01,0.718847,0.730051,0.671941,0.728657,0.728657,150186000,2018,1,1
2018-01-02,0.724676,0.794646,0.697856,0.782587,0.782587,289712000,2018,1,2
2018-01-03,0.779681,1.085670,0.778578,1.079660,1.079660,657398016,2018,1,3
2018-01-04,1.094030,1.327210,1.037650,1.114120,1.114120,593430016,2018,1,4
2018-01-05,1.171150,1.252420,0.903503,0.999559,0.999559,508100000,2018,1,5
...,...,...,...,...,...,...,...,...,...
2023-05-21,0.366352,0.367398,0.358336,0.360363,0.360363,116595379,2023,5,21
2023-05-22,0.360382,0.373213,0.357263,0.367997,0.367997,177956408,2023,5,22
2023-05-23,0.368004,0.375178,0.366596,0.370484,0.370484,141912382,2023,5,23
2023-05-24,0.370500,0.370566,0.360984,0.364234,0.364234,175329362,2023,5,24


In [5]:
adaDF["TomorrowClose"] = adaDF["Close"].shift(-1)
adaDF.dropna(inplace=True)
adaDF

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day,TomorrowClose
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-01-01,0.718847,0.730051,0.671941,0.728657,0.728657,150186000,2018,1,1,0.782587
2018-01-02,0.724676,0.794646,0.697856,0.782587,0.782587,289712000,2018,1,2,1.079660
2018-01-03,0.779681,1.085670,0.778578,1.079660,1.079660,657398016,2018,1,3,1.114120
2018-01-04,1.094030,1.327210,1.037650,1.114120,1.114120,593430016,2018,1,4,0.999559
2018-01-05,1.171150,1.252420,0.903503,0.999559,0.999559,508100000,2018,1,5,1.027150
...,...,...,...,...,...,...,...,...,...,...
2023-05-20,0.368242,0.368382,0.364512,0.366357,0.366357,118748065,2023,5,20,0.360363
2023-05-21,0.366352,0.367398,0.358336,0.360363,0.360363,116595379,2023,5,21,0.367997
2023-05-22,0.360382,0.373213,0.357263,0.367997,0.367997,177956408,2023,5,22,0.370484
2023-05-23,0.368004,0.375178,0.366596,0.370484,0.370484,141912382,2023,5,23,0.364234


In [6]:
adaDF.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1970 entries, 2018-01-01 to 2023-05-24
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           1970 non-null   float64
 1   High           1970 non-null   float64
 2   Low            1970 non-null   float64
 3   Close          1970 non-null   float64
 4   Adj Close      1970 non-null   float64
 5   Volume         1970 non-null   int64  
 6   year           1970 non-null   int64  
 7   month          1970 non-null   int64  
 8   day            1970 non-null   int64  
 9   TomorrowClose  1970 non-null   float64
dtypes: float64(6), int64(4)
memory usage: 169.3 KB


In [7]:
# Separate the data into labels and features

y = adaDF["TomorrowClose"]
X = adaDF.drop(columns=["TomorrowClose"])



In [8]:
# Review the y variable Series
y

Date
2018-01-01    0.782587
2018-01-02    1.079660
2018-01-03    1.114120
2018-01-04    0.999559
2018-01-05    1.027150
                ...   
2023-05-20    0.360363
2023-05-21    0.367997
2023-05-22    0.370484
2023-05-23    0.364234
2023-05-24    0.358662
Name: TomorrowClose, Length: 1970, dtype: float64

In [9]:
# Review the X variable DataFrame
X

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-01,0.718847,0.730051,0.671941,0.728657,0.728657,150186000,2018,1,1
2018-01-02,0.724676,0.794646,0.697856,0.782587,0.782587,289712000,2018,1,2
2018-01-03,0.779681,1.085670,0.778578,1.079660,1.079660,657398016,2018,1,3
2018-01-04,1.094030,1.327210,1.037650,1.114120,1.114120,593430016,2018,1,4
2018-01-05,1.171150,1.252420,0.903503,0.999559,0.999559,508100000,2018,1,5
...,...,...,...,...,...,...,...,...,...
2023-05-20,0.368242,0.368382,0.364512,0.366357,0.366357,118748065,2023,5,20
2023-05-21,0.366352,0.367398,0.358336,0.360363,0.360363,116595379,2023,5,21
2023-05-22,0.360382,0.373213,0.357263,0.367997,0.367997,177956408,2023,5,22
2023-05-23,0.368004,0.375178,0.366596,0.370484,0.370484,141912382,2023,5,23


In [10]:
# Check the balance of our target values
y.value_counts()

0.041672    2
0.044990    2
0.044777    2
0.059944    2
0.782587    1
           ..
0.039280    1
0.039529    1
0.039225    1
0.038073    1
0.358662    1
Name: TomorrowClose, Length: 1966, dtype: int64

In [11]:
# Import the train_test_learn module
from sklearn.model_selection import train_test_split

# Split the data using train_test_split
# Assign a random_state of 1 to the function

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    )


In [12]:
from sklearn.linear_model import LinearRegression

# Instantiate the Linear Regression model
regressor = LinearRegression()

# Fit the model using training data
regressor.fit(X_train, y_train)

# Predict the future values using test data
y_pred = regressor.predict(X_test)

In [13]:
X_train

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-03-26,0.029438,0.030742,0.029351,0.030727,0.030727,81633746,2020,3,26
2018-09-28,0.086403,0.088327,0.082045,0.084032,0.084032,89113600,2018,9,28
2022-11-22,0.304524,0.315869,0.299104,0.311922,0.311922,303965817,2022,11,22
2023-05-15,0.370787,0.375465,0.367480,0.367482,0.367482,176627170,2023,5,15
2019-09-01,0.044952,0.045357,0.043729,0.044543,0.044543,31928555,2019,9,1
...,...,...,...,...,...,...,...,...,...
2022-11-27,0.313793,0.319191,0.312288,0.312454,0.312454,167898046,2022,11,27
2021-01-01,0.181382,0.184246,0.172022,0.175350,0.175350,1122218004,2021,1,1
2023-04-17,0.451758,0.451758,0.433168,0.434167,0.434167,467653074,2023,4,17
2018-08-24,0.092846,0.094363,0.090948,0.094077,0.094077,35302400,2018,8,24


In [14]:
# Evaluate the model's performance
from sklearn.metrics import mean_squared_error, r2_score

# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the coefficient of determination (R^2 score)
r2 = r2_score(y_test, y_pred)
print("R^2 Score:", r2)


Mean Squared Error: 0.0024584120226889966
R^2 Score: 0.9933746194984082


In [37]:
print("Cardano (ADA)")
highest_value = adaDF['High'].max()
highest_date = adaDF.loc[adaDF['High'] == highest_value, ['month', 'day', 'year']].astype(str).agg('/'.join, axis=1)
rounded_highest_value = round(highest_value, 2)
print("Highest Value: $", rounded_highest_value, "on", highest_date.iloc[0])

lowest_value = adaDF['High'].min()
lowest_date = adaDF.loc[adaDF['High'] == lowest_value, ['month', 'day', 'year']].astype(str).agg('/'.join, axis=1)
rounded_lowest_value = round(lowest_value, 2)
print("Lowest Value: $", rounded_lowest_value, "on", lowest_date.iloc[0])



Cardano (ADA)
Highest Value: $ 3.1 on 9/2/2021
Lowest Value: $ 0.03 on 3/18/2020


In [16]:
import statsmodels.api as sm

model = sm.OLS(y_train,X_train)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          TomorrowClose   R-squared:                       0.994
Model:                            OLS   Adj. R-squared:                  0.994
Method:                 Least Squares   F-statistic:                 3.720e+04
Date:                Fri, 02 Jun 2023   Prob (F-statistic):               0.00
Time:                        15:30:28   Log-Likelihood:                 2468.4
No. Observations:                1477   AIC:                            -4921.
Df Residuals:                    1469   BIC:                            -4878.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Open           0.1582      0.048      3.285      0.0

In [17]:
y_test

Date
2019-11-12    0.043492
2021-04-25    1.234019
2021-03-09    1.135003
2021-06-04    1.659009
2020-10-05    0.092808
                ...   
2018-01-20    0.613163
2021-06-21    1.153254
2022-05-11    0.473746
2019-03-09    0.045698
2022-09-22    0.461821
Name: TomorrowClose, Length: 493, dtype: float64

In [18]:
y_pred

array([0.04415345, 1.08890645, 1.18550886, 1.72630152, 0.09876228,
       1.24582735, 0.08391498, 0.04849961, 0.03194427, 0.04438658,
       0.10673157, 0.09968582, 0.04428348, 0.04362076, 2.40978485,
       0.05986891, 0.03697927, 0.04653831, 0.14120107, 0.39583945,
       0.0393381 , 0.07199668, 0.08783022, 0.51312637, 0.15705049,
       0.12113301, 0.53487398, 1.42133157, 0.37070576, 0.07021873,
       0.04011769, 0.44768844, 0.31865788, 0.51452656, 0.40987661,
       0.13592828, 0.14583735, 1.33593597, 0.99253538, 0.3372788 ,
       2.20562604, 0.08031628, 0.09076689, 0.10399437, 0.04762399,
       1.82666349, 0.09634008, 0.37075572, 1.94618328, 0.08595713,
       0.36317726, 0.0497734 , 0.07197349, 0.05847882, 0.39937802,
       1.04137346, 0.28244032, 0.50230484, 0.09799946, 0.1464184 ,
       2.0239018 , 0.03608221, 0.30673152, 0.18047749, 0.92446581,
       0.14140795, 0.05858903, 0.04959084, 0.32051042, 0.38540813,
       0.10751606, 0.03463528, 0.09481329, 0.04958919, 0.05733

In [19]:
# Make a prediction using the testing data

predictions = regressor.predict(X_test)
predictions

array([0.04415345, 1.08890645, 1.18550886, 1.72630152, 0.09876228,
       1.24582735, 0.08391498, 0.04849961, 0.03194427, 0.04438658,
       0.10673157, 0.09968582, 0.04428348, 0.04362076, 2.40978485,
       0.05986891, 0.03697927, 0.04653831, 0.14120107, 0.39583945,
       0.0393381 , 0.07199668, 0.08783022, 0.51312637, 0.15705049,
       0.12113301, 0.53487398, 1.42133157, 0.37070576, 0.07021873,
       0.04011769, 0.44768844, 0.31865788, 0.51452656, 0.40987661,
       0.13592828, 0.14583735, 1.33593597, 0.99253538, 0.3372788 ,
       2.20562604, 0.08031628, 0.09076689, 0.10399437, 0.04762399,
       1.82666349, 0.09634008, 0.37075572, 1.94618328, 0.08595713,
       0.36317726, 0.0497734 , 0.07197349, 0.05847882, 0.39937802,
       1.04137346, 0.28244032, 0.50230484, 0.09799946, 0.1464184 ,
       2.0239018 , 0.03608221, 0.30673152, 0.18047749, 0.92446581,
       0.14140795, 0.05858903, 0.04959084, 0.32051042, 0.38540813,
       0.10751606, 0.03463528, 0.09481329, 0.04958919, 0.05733