In [25]:
# Import the modules
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

In [26]:
# Read the CSV file from the Resources folder into a Pandas DataFrame
file_path = Path("ADA-USD.csv")
adaDF = pd.read_csv(file_path)

# Display sample data
adaDF.head() 

# Review the DataFrame
adaDF

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2018-01-01,0.718847,0.730051,0.671941,0.728657,0.728657,150186000
1,2018-01-02,0.724676,0.794646,0.697856,0.782587,0.782587,289712000
2,2018-01-03,0.779681,1.085670,0.778578,1.079660,1.079660,657398016
3,2018-01-04,1.094030,1.327210,1.037650,1.114120,1.114120,593430016
4,2018-01-05,1.171150,1.252420,0.903503,0.999559,0.999559,508100000
...,...,...,...,...,...,...,...
1966,2023-05-21,0.366352,0.367398,0.358336,0.360363,0.360363,116595379
1967,2023-05-22,0.360382,0.373213,0.357263,0.367997,0.367997,177956408
1968,2023-05-23,0.368004,0.375178,0.366596,0.370484,0.370484,141912382
1969,2023-05-24,0.370500,0.370566,0.360984,0.364234,0.364234,175329362


In [27]:
adaDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1971 entries, 0 to 1970
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1971 non-null   object 
 1   Open       1971 non-null   float64
 2   High       1971 non-null   float64
 3   Low        1971 non-null   float64
 4   Close      1971 non-null   float64
 5   Adj Close  1971 non-null   float64
 6   Volume     1971 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 107.9+ KB


In [28]:
# Convert 'date' column to datetime
adaDF['Date'] = pd.to_datetime(adaDF['Date'])

# Extract year, month, and day into separate columns
adaDF['year'] = adaDF['Date'].dt.year.astype(int)
adaDF['month'] = adaDF['Date'].dt.month.astype(int)
adaDF['day'] = adaDF['Date'].dt.day.astype(int)

adaDF.set_index("Date", inplace=True)

# Display the updated DataFrame
adaDF


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-01,0.718847,0.730051,0.671941,0.728657,0.728657,150186000,2018,1,1
2018-01-02,0.724676,0.794646,0.697856,0.782587,0.782587,289712000,2018,1,2
2018-01-03,0.779681,1.085670,0.778578,1.079660,1.079660,657398016,2018,1,3
2018-01-04,1.094030,1.327210,1.037650,1.114120,1.114120,593430016,2018,1,4
2018-01-05,1.171150,1.252420,0.903503,0.999559,0.999559,508100000,2018,1,5
...,...,...,...,...,...,...,...,...,...
2023-05-21,0.366352,0.367398,0.358336,0.360363,0.360363,116595379,2023,5,21
2023-05-22,0.360382,0.373213,0.357263,0.367997,0.367997,177956408,2023,5,22
2023-05-23,0.368004,0.375178,0.366596,0.370484,0.370484,141912382,2023,5,23
2023-05-24,0.370500,0.370566,0.360984,0.364234,0.364234,175329362,2023,5,24


In [29]:
adaDF["TomorrowClose"] = adaDF["Close"].shift(-1)
adaDF.dropna(inplace=True)
adaDF

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day,TomorrowClose
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-01-01,0.718847,0.730051,0.671941,0.728657,0.728657,150186000,2018,1,1,0.782587
2018-01-02,0.724676,0.794646,0.697856,0.782587,0.782587,289712000,2018,1,2,1.079660
2018-01-03,0.779681,1.085670,0.778578,1.079660,1.079660,657398016,2018,1,3,1.114120
2018-01-04,1.094030,1.327210,1.037650,1.114120,1.114120,593430016,2018,1,4,0.999559
2018-01-05,1.171150,1.252420,0.903503,0.999559,0.999559,508100000,2018,1,5,1.027150
...,...,...,...,...,...,...,...,...,...,...
2023-05-20,0.368242,0.368382,0.364512,0.366357,0.366357,118748065,2023,5,20,0.360363
2023-05-21,0.366352,0.367398,0.358336,0.360363,0.360363,116595379,2023,5,21,0.367997
2023-05-22,0.360382,0.373213,0.357263,0.367997,0.367997,177956408,2023,5,22,0.370484
2023-05-23,0.368004,0.375178,0.366596,0.370484,0.370484,141912382,2023,5,23,0.364234


In [30]:
adaDF.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1970 entries, 2018-01-01 to 2023-05-24
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           1970 non-null   float64
 1   High           1970 non-null   float64
 2   Low            1970 non-null   float64
 3   Close          1970 non-null   float64
 4   Adj Close      1970 non-null   float64
 5   Volume         1970 non-null   int64  
 6   year           1970 non-null   int32  
 7   month          1970 non-null   int32  
 8   day            1970 non-null   int32  
 9   TomorrowClose  1970 non-null   float64
dtypes: float64(6), int32(3), int64(1)
memory usage: 146.2 KB


In [31]:
# Separate the data into labels and features

y = adaDF["TomorrowClose"]
X = adaDF.drop(columns=["TomorrowClose"])



In [32]:
# Review the y variable Series
y

Date
2018-01-01    0.782587
2018-01-02    1.079660
2018-01-03    1.114120
2018-01-04    0.999559
2018-01-05    1.027150
                ...   
2023-05-20    0.360363
2023-05-21    0.367997
2023-05-22    0.370484
2023-05-23    0.364234
2023-05-24    0.358662
Name: TomorrowClose, Length: 1970, dtype: float64

In [33]:
# Review the X variable DataFrame
X

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-01,0.718847,0.730051,0.671941,0.728657,0.728657,150186000,2018,1,1
2018-01-02,0.724676,0.794646,0.697856,0.782587,0.782587,289712000,2018,1,2
2018-01-03,0.779681,1.085670,0.778578,1.079660,1.079660,657398016,2018,1,3
2018-01-04,1.094030,1.327210,1.037650,1.114120,1.114120,593430016,2018,1,4
2018-01-05,1.171150,1.252420,0.903503,0.999559,0.999559,508100000,2018,1,5
...,...,...,...,...,...,...,...,...,...
2023-05-20,0.368242,0.368382,0.364512,0.366357,0.366357,118748065,2023,5,20
2023-05-21,0.366352,0.367398,0.358336,0.360363,0.360363,116595379,2023,5,21
2023-05-22,0.360382,0.373213,0.357263,0.367997,0.367997,177956408,2023,5,22
2023-05-23,0.368004,0.375178,0.366596,0.370484,0.370484,141912382,2023,5,23


In [34]:
# Check the balance of our target values
y.value_counts()

TomorrowClose
0.041672    2
0.044990    2
0.044777    2
0.059944    2
0.782587    1
           ..
0.039280    1
0.039529    1
0.039225    1
0.038073    1
0.358662    1
Name: count, Length: 1966, dtype: int64

In [35]:
# Import the train_test_learn module
from sklearn.model_selection import train_test_split

# Split the data using train_test_split
# Assign a random_state of 1 to the function

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    )


In [36]:
from sklearn.linear_model import LinearRegression

# Instantiate the Linear Regression model
regressor = LinearRegression()

# Fit the model using training data
regressor.fit(X_train, y_train)

# Predict the future values using test data
y_pred = regressor.predict(X_test)

In [37]:
X_train

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-03-26,0.029438,0.030742,0.029351,0.030727,0.030727,81633746,2020,3,26
2018-09-28,0.086403,0.088327,0.082045,0.084032,0.084032,89113600,2018,9,28
2022-11-22,0.304524,0.315869,0.299104,0.311922,0.311922,303965817,2022,11,22
2023-05-15,0.370787,0.375465,0.367480,0.367482,0.367482,176627170,2023,5,15
2019-09-01,0.044952,0.045357,0.043729,0.044543,0.044543,31928555,2019,9,1
...,...,...,...,...,...,...,...,...,...
2022-11-27,0.313793,0.319191,0.312288,0.312454,0.312454,167898046,2022,11,27
2021-01-01,0.181382,0.184246,0.172022,0.175350,0.175350,1122218004,2021,1,1
2023-04-17,0.451758,0.451758,0.433168,0.434167,0.434167,467653074,2023,4,17
2018-08-24,0.092846,0.094363,0.090948,0.094077,0.094077,35302400,2018,8,24


In [38]:
# Evaluate the model's performance
from sklearn.metrics import mean_squared_error, r2_score

# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the coefficient of determination (R^2 score)
r2 = r2_score(y_test, y_pred)
print("R^2 Score:", r2)


Mean Squared Error: 0.002458411849131973
R^2 Score: 0.9933746199661416


In [39]:
print("Cardano (ADA)")
highest_value = adaDF['High'].max()
highest_date = adaDF.loc[adaDF['High'] == highest_value, ['month', 'day', 'year']].astype(str).agg('/'.join, axis=1)
rounded_highest_value = round(highest_value, 2)
print("Highest Value: $", rounded_highest_value, "on", highest_date.iloc[0])

lowest_value = adaDF['High'].min()
lowest_date = adaDF.loc[adaDF['High'] == lowest_value, ['month', 'day', 'year']].astype(str).agg('/'.join, axis=1)
rounded_lowest_value = round(lowest_value, 2)
print("Lowest Value: $", rounded_lowest_value, "on", lowest_date.iloc[0])



Cardano (ADA)
Highest Value: $ 3.1 on 9/2/2021
Lowest Value: $ 0.03 on 3/18/2020


In [40]:
import statsmodels.api as sm

model = sm.OLS(y_train,X_train)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          TomorrowClose   R-squared:                       0.994
Model:                            OLS   Adj. R-squared:                  0.994
Method:                 Least Squares   F-statistic:                 3.720e+04
Date:                Sat, 03 Jun 2023   Prob (F-statistic):               0.00
Time:                        13:13:25   Log-Likelihood:                 2468.4
No. Observations:                1477   AIC:                            -4921.
Df Residuals:                    1469   BIC:                            -4878.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Open           0.1582      0.048      3.285      0.0

In [41]:
y_test

Date
2019-11-12    0.043492
2021-04-25    1.234019
2021-03-09    1.135003
2021-06-04    1.659009
2020-10-05    0.092808
                ...   
2018-01-20    0.613163
2021-06-21    1.153254
2022-05-11    0.473746
2019-03-09    0.045698
2022-09-22    0.461821
Name: TomorrowClose, Length: 493, dtype: float64

In [42]:
y_pred

array([0.04415344, 1.08890646, 1.18550889, 1.72630154, 0.09876227,
       1.24582737, 0.08391497, 0.0484996 , 0.03194426, 0.04438657,
       0.10673157, 0.09968582, 0.04428347, 0.04362075, 2.40978488,
       0.0598689 , 0.03697927, 0.04653831, 0.14120107, 0.39583945,
       0.0393381 , 0.07199667, 0.08783021, 0.51312636, 0.15705049,
       0.121133  , 0.53487398, 1.42133158, 0.37070576, 0.07021873,
       0.04011768, 0.44768844, 0.31865788, 0.51452656, 0.40987661,
       0.13592828, 0.14583734, 1.33593597, 0.99253538, 0.33727879,
       2.20562606, 0.08031627, 0.09076689, 0.10399437, 0.04762398,
       1.82666351, 0.09634008, 0.37075571, 1.9461833 , 0.08595712,
       0.36317727, 0.04977339, 0.07197348, 0.05847882, 0.39937802,
       1.04137348, 0.28244032, 0.50230484, 0.09799946, 0.14641839,
       2.02390182, 0.0360822 , 0.30673154, 0.18047749, 0.92446583,
       0.14140794, 0.05858902, 0.04959083, 0.32051041, 0.38540812,
       0.10751605, 0.03463527, 0.09481329, 0.04958918, 0.05733

In [43]:
# Make a prediction using the testing data

predictions = regressor.predict(X_test)
predictions

array([0.04415344, 1.08890646, 1.18550889, 1.72630154, 0.09876227,
       1.24582737, 0.08391497, 0.0484996 , 0.03194426, 0.04438657,
       0.10673157, 0.09968582, 0.04428347, 0.04362075, 2.40978488,
       0.0598689 , 0.03697927, 0.04653831, 0.14120107, 0.39583945,
       0.0393381 , 0.07199667, 0.08783021, 0.51312636, 0.15705049,
       0.121133  , 0.53487398, 1.42133158, 0.37070576, 0.07021873,
       0.04011768, 0.44768844, 0.31865788, 0.51452656, 0.40987661,
       0.13592828, 0.14583734, 1.33593597, 0.99253538, 0.33727879,
       2.20562606, 0.08031627, 0.09076689, 0.10399437, 0.04762398,
       1.82666351, 0.09634008, 0.37075571, 1.9461833 , 0.08595712,
       0.36317727, 0.04977339, 0.07197348, 0.05847882, 0.39937802,
       1.04137348, 0.28244032, 0.50230484, 0.09799946, 0.14641839,
       2.02390182, 0.0360822 , 0.30673154, 0.18047749, 0.92446583,
       0.14140794, 0.05858902, 0.04959083, 0.32051041, 0.38540812,
       0.10751605, 0.03463527, 0.09481329, 0.04958918, 0.05733

In [44]:
X_test

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-11-12,0.043257,0.044631,0.043227,0.043767,0.043767,79520150,2019,11,12
2021-04-25,1.102703,1.146124,1.022775,1.088250,1.088250,2465672138,2021,4,25
2021-03-09,1.118145,1.214923,1.113320,1.195161,1.195161,5317733185,2021,3,9
2021-06-04,1.846715,1.848485,1.609734,1.710908,1.710908,4583062567,2021,6,4
2020-10-05,0.096404,0.098719,0.096325,0.097544,0.097544,625952257,2020,10,5
...,...,...,...,...,...,...,...,...,...
2018-01-20,0.656564,0.726242,0.647644,0.707510,0.707510,738798976,2018,1,20
2021-06-21,1.427028,1.436257,1.170604,1.177480,1.177480,3987752749,2021,6,21
2022-05-11,0.628772,0.657571,0.484610,0.512800,0.512800,2692548110,2022,5,11
2019-03-09,0.042686,0.046951,0.042656,0.046728,0.046728,52185658,2019,3,9


['../../Reginald/Webapp/ml/models/model_ada.joblib']