In [1]:
# Import the modules
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

In [2]:
# Read the CSV file from the Resources folder into a Pandas DataFrame
file_path = Path("LTC-USD.csv")
adaDF = pd.read_csv(file_path)

# Display sample data
adaDF.head() 

# Review the DataFrame
adaDF

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2015-01-01,2.724210,2.724210,2.689770,2.699050,2.699050,770693
1,2015-01-02,2.697430,2.699240,2.663850,2.667360,2.667360,855392
2,2015-01-03,2.666460,2.666460,2.133160,2.133160,2.133160,5193080
3,2015-01-04,2.110220,2.154460,1.912410,1.956680,1.956680,3888140
4,2015-01-05,1.958030,2.160480,1.958030,2.082180,2.082180,10649500
...,...,...,...,...,...,...,...
3062,2023-05-21,92.377602,93.296150,91.468597,92.278870,92.278870,365938794
3063,2023-05-22,92.277184,92.659637,90.364304,90.946075,90.946075,396591248
3064,2023-05-23,90.948753,92.988068,90.690895,91.632332,91.632332,384556801
3065,2023-05-24,91.636017,91.792946,85.111000,86.000366,86.000366,771426876


In [3]:
adaDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3067 entries, 0 to 3066
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       3067 non-null   object 
 1   Open       3067 non-null   float64
 2   High       3067 non-null   float64
 3   Low        3067 non-null   float64
 4   Close      3067 non-null   float64
 5   Adj Close  3067 non-null   float64
 6   Volume     3067 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 167.9+ KB


In [4]:

# Convert 'date' column to datetime
adaDF['Date'] = pd.to_datetime(adaDF['Date'])

# Extract year, month, and day into separate columns
adaDF['year'] = adaDF['Date'].dt.year.astype(int)
adaDF['month'] = adaDF['Date'].dt.month.astype(int)
adaDF['day'] = adaDF['Date'].dt.day.astype(int)

adaDF.set_index("Date", inplace=True)

# Display the updated DataFrame
adaDF


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-01-01,2.724210,2.724210,2.689770,2.699050,2.699050,770693,2015,1,1
2015-01-02,2.697430,2.699240,2.663850,2.667360,2.667360,855392,2015,1,2
2015-01-03,2.666460,2.666460,2.133160,2.133160,2.133160,5193080,2015,1,3
2015-01-04,2.110220,2.154460,1.912410,1.956680,1.956680,3888140,2015,1,4
2015-01-05,1.958030,2.160480,1.958030,2.082180,2.082180,10649500,2015,1,5
...,...,...,...,...,...,...,...,...,...
2023-05-21,92.377602,93.296150,91.468597,92.278870,92.278870,365938794,2023,5,21
2023-05-22,92.277184,92.659637,90.364304,90.946075,90.946075,396591248,2023,5,22
2023-05-23,90.948753,92.988068,90.690895,91.632332,91.632332,384556801,2023,5,23
2023-05-24,91.636017,91.792946,85.111000,86.000366,86.000366,771426876,2023,5,24


In [5]:
adaDF["TomorrowClose"] = adaDF["Close"].shift(-1)
adaDF.dropna(inplace=True)
adaDF

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day,TomorrowClose
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-01,2.724210,2.724210,2.689770,2.699050,2.699050,770693,2015,1,1,2.667360
2015-01-02,2.697430,2.699240,2.663850,2.667360,2.667360,855392,2015,1,2,2.133160
2015-01-03,2.666460,2.666460,2.133160,2.133160,2.133160,5193080,2015,1,3,1.956680
2015-01-04,2.110220,2.154460,1.912410,1.956680,1.956680,3888140,2015,1,4,2.082180
2015-01-05,1.958030,2.160480,1.958030,2.082180,2.082180,10649500,2015,1,5,2.112120
...,...,...,...,...,...,...,...,...,...,...
2023-05-20,91.711212,92.682510,90.824493,92.371315,92.371315,306548115,2023,5,20,92.278870
2023-05-21,92.377602,93.296150,91.468597,92.278870,92.278870,365938794,2023,5,21,90.946075
2023-05-22,92.277184,92.659637,90.364304,90.946075,90.946075,396591248,2023,5,22,91.632332
2023-05-23,90.948753,92.988068,90.690895,91.632332,91.632332,384556801,2023,5,23,86.000366


In [6]:
adaDF.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3066 entries, 2015-01-01 to 2023-05-24
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           3066 non-null   float64
 1   High           3066 non-null   float64
 2   Low            3066 non-null   float64
 3   Close          3066 non-null   float64
 4   Adj Close      3066 non-null   float64
 5   Volume         3066 non-null   int64  
 6   year           3066 non-null   int64  
 7   month          3066 non-null   int64  
 8   day            3066 non-null   int64  
 9   TomorrowClose  3066 non-null   float64
dtypes: float64(6), int64(4)
memory usage: 263.5 KB


In [7]:
# Separate the data into labels and features

y = adaDF["TomorrowClose"]
X = adaDF.drop(columns=["TomorrowClose"])



In [8]:
# Review the y variable Series
y

Date
2015-01-01     2.667360
2015-01-02     2.133160
2015-01-03     1.956680
2015-01-04     2.082180
2015-01-05     2.112120
                ...    
2023-05-20    92.278870
2023-05-21    90.946075
2023-05-22    91.632332
2023-05-23    86.000366
2023-05-24    85.232765
Name: TomorrowClose, Length: 3066, dtype: float64

In [9]:
# Review the X variable DataFrame
X

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-01-01,2.724210,2.724210,2.689770,2.699050,2.699050,770693,2015,1,1
2015-01-02,2.697430,2.699240,2.663850,2.667360,2.667360,855392,2015,1,2
2015-01-03,2.666460,2.666460,2.133160,2.133160,2.133160,5193080,2015,1,3
2015-01-04,2.110220,2.154460,1.912410,1.956680,1.956680,3888140,2015,1,4
2015-01-05,1.958030,2.160480,1.958030,2.082180,2.082180,10649500,2015,1,5
...,...,...,...,...,...,...,...,...,...
2023-05-20,91.711212,92.682510,90.824493,92.371315,92.371315,306548115,2023,5,20
2023-05-21,92.377602,93.296150,91.468597,92.278870,92.278870,365938794,2023,5,21
2023-05-22,92.277184,92.659637,90.364304,90.946075,90.946075,396591248,2023,5,22
2023-05-23,90.948753,92.988068,90.690895,91.632332,91.632332,384556801,2023,5,23


In [10]:
# Check the balance of our target values
y.value_counts()

3.786810     2
1.786460     2
2.844650     2
2.667360     1
57.741318    1
            ..
56.494301    1
55.737999    1
55.332401    1
54.693901    1
85.232765    1
Name: TomorrowClose, Length: 3063, dtype: int64

In [11]:
# Import the train_test_learn module
from sklearn.model_selection import train_test_split

# Split the data using train_test_split
# Assign a random_state of 1 to the function

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    )


In [12]:
from sklearn.linear_model import LinearRegression

# Instantiate the Linear Regression model
regressor = LinearRegression()

# Fit the model using training data
regressor.fit(X_train, y_train)

# Predict the future values using test data
y_pred = regressor.predict(X_test)

In [13]:
X_train

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,year,month,day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-04-04,133.908005,134.610001,116.583000,118.412003,118.412003,380351008,2018,4,4
2016-04-21,3.280930,3.351760,3.273470,3.346400,3.346400,1688230,2016,4,21
2018-05-09,159.617004,159.617004,152.598007,157.054993,157.054993,448136000,2018,5,9
2017-06-28,40.816002,43.613701,39.177700,42.849499,42.849499,353444000,2017,6,28
2020-03-10,50.479828,51.252319,49.395454,50.256390,50.256390,3945792927,2020,3,10
...,...,...,...,...,...,...,...,...,...
2022-07-26,54.353600,54.353600,52.195511,53.790173,53.790173,400322025,2022,7,26
2017-06-24,47.246700,47.443298,44.011600,44.491798,44.491798,321214016,2017,6,24
2018-01-01,231.666000,236.634003,222.203003,229.033005,229.033005,633142016,2018,1,1
2015-08-24,3.392580,3.392580,2.966110,2.994740,2.994740,7278630,2015,8,24


In [14]:
# Evaluate the model's performance
from sklearn.metrics import mean_squared_error, r2_score

# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the coefficient of determination (R^2 score)
r2 = r2_score(y_test, y_pred)
print("R^2 Score:", r2)




Mean Squared Error: 31.21858326886523
R^2 Score: 0.9918190093314659


In [15]:
print("Litecoin (LTC)")
highest_value = adaDF['High'].max()
highest_date = adaDF.loc[adaDF['High'] == highest_value, ['month', 'day', 'year']].astype(str).agg('/'.join, axis=1)
rounded_highest_value = round(highest_value, 2)
print("Highest Value: $", rounded_highest_value, "on", highest_date.iloc[0])

lowest_value = adaDF['High'].min()
lowest_date = adaDF.loc[adaDF['High'] == lowest_value, ['month', 'day', 'year']].astype(str).agg('/'.join, axis=1)
rounded_lowest_value = round(lowest_value, 2)
print("Lowest Value: $", rounded_lowest_value, "on", lowest_date.iloc[0])


Litecoin (LTC)
Highest Value: $ 412.96 on 5/10/2021
Lowest Value: $ 1.34 on 1/20/2015


In [16]:
import statsmodels.api as sm

model = sm.OLS(y_train,X_train)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          TomorrowClose   R-squared:                       0.990
Model:                            OLS   Adj. R-squared:                  0.990
Method:                 Least Squares   F-statistic:                 3.204e+04
Date:                Fri, 02 Jun 2023   Prob (F-statistic):               0.00
Time:                        16:56:56   Log-Likelihood:                -7662.8
No. Observations:                2299   AIC:                         1.534e+04
Df Residuals:                    2291   BIC:                         1.539e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Open           0.0425      0.041      1.049      0.2

In [17]:
y_test

Date
2017-06-25    41.131802
2015-01-07     2.008070
2019-02-18    47.864677
2015-02-15     1.796280
2020-01-28    60.077934
                ...    
2022-09-28    53.914047
2016-07-09     4.099830
2015-07-06     5.234630
2015-06-16     2.888620
2017-10-17    60.730801
Name: TomorrowClose, Length: 767, dtype: float64

In [18]:
y_pred

array([ 43.51245209,   2.24580501,  48.6271892 ,   1.83409011,
        60.84276123,  76.15816995,  46.0731115 ,  92.17333509,
         2.16547817,   4.1948799 ,   3.94787383,   3.20683584,
        44.52889325,  51.74608021,  56.83904746,   4.70057022,
         3.66516709,  42.0121582 ,  82.26695417,  98.54279348,
        60.04608031, 219.1132664 , 118.40765276,  90.72835558,
       154.89752073,  69.05611357, 145.83903882,  61.27770216,
         2.92583507,   3.15657255,  72.16127078,  56.90281195,
        77.99999467,  36.26742397,   4.44266467, 110.93864844,
       278.21582294,  90.90735634,   1.65074965, 143.32633896,
        68.01733301, 112.52413631,   4.06073459,  42.81529849,
        54.46806041,   2.92638423,   4.53923795,  48.34722577,
       144.14857649,  58.60154663,  33.69085876,   4.38938756,
        79.08214159,  55.68917788,  41.41133056, 224.87516496,
         2.26881537,  96.89823526,  53.17163083,  57.40080423,
       219.73315243,  45.14504292, 102.05396871,  41.95

In [19]:
# Make a prediction using the testing data

predictions = regressor.predict(X_test)
predictions

array([ 43.51245209,   2.24580501,  48.6271892 ,   1.83409011,
        60.84276123,  76.15816995,  46.0731115 ,  92.17333509,
         2.16547817,   4.1948799 ,   3.94787383,   3.20683584,
        44.52889325,  51.74608021,  56.83904746,   4.70057022,
         3.66516709,  42.0121582 ,  82.26695417,  98.54279348,
        60.04608031, 219.1132664 , 118.40765276,  90.72835558,
       154.89752073,  69.05611357, 145.83903882,  61.27770216,
         2.92583507,   3.15657255,  72.16127078,  56.90281195,
        77.99999467,  36.26742397,   4.44266467, 110.93864844,
       278.21582294,  90.90735634,   1.65074965, 143.32633896,
        68.01733301, 112.52413631,   4.06073459,  42.81529849,
        54.46806041,   2.92638423,   4.53923795,  48.34722577,
       144.14857649,  58.60154663,  33.69085876,   4.38938756,
        79.08214159,  55.68917788,  41.41133056, 224.87516496,
         2.26881537,  96.89823526,  53.17163083,  57.40080423,
       219.73315243,  45.14504292, 102.05396871,  41.95