In [293]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [294]:
ticker = 'AMZN'

data = yf.download(ticker, start="2022-05-01", end="2024-05-27")

data.reset_index(inplace=True)

data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2022-05-02,122.401001,124.667999,118.375000,124.500000,124.500000,148788000
1,2022-05-03,124.053497,126.220497,122.824997,124.253502,124.253502,79134000
2,2022-05-04,123.599998,126.000000,119.182999,125.928497,125.928497,110746000
3,2022-05-05,123.000000,123.499001,115.072502,116.406998,116.406998,144392000
4,2022-05-06,114.849998,119.050499,113.081497,114.772499,114.772499,124260000
...,...,...,...,...,...,...,...
515,2024-05-20,184.339996,186.669998,183.279999,183.539993,183.539993,30511800
516,2024-05-21,182.300003,183.259995,180.750000,183.149994,183.149994,50839100
517,2024-05-22,183.880005,185.220001,181.970001,183.130005,183.130005,28148800
518,2024-05-23,183.660004,184.759995,180.080002,181.050003,181.050003,33670200


In [295]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 520 entries, 0 to 519
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       520 non-null    datetime64[ns]
 1   Open       520 non-null    float64       
 2   High       520 non-null    float64       
 3   Low        520 non-null    float64       
 4   Close      520 non-null    float64       
 5   Adj Close  520 non-null    float64       
 6   Volume     520 non-null    int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 28.6 KB


In [296]:
X = data[['High','Low','Close','Volume']]
y = data['Open']

In [297]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state=42)

In [298]:
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

In [299]:
y_pred = model.predict(X_test)
y_pred

array([127.21949806, 127.79764854, 110.09440197,  83.56065186,
       132.08999908, 184.23850372, 129.15139668,  99.4508989 ,
       102.64750088, 182.3776474 ,  93.87605061,  93.96459984,
       129.79540085, 187.88610359, 111.24241016, 140.37225143,
       125.03700047, 103.58192421, 176.09169975, 100.43462482,
        89.05704998, 129.3919474 , 115.74429947,  85.0066494 ,
       139.65275185, 132.16259933, 134.56104973, 169.08665222,
       188.20110268, 104.00399883, 107.65954525, 138.27729904,
       134.27115227, 110.16187218, 123.71761074, 137.64149925,
       153.61789871,  95.59450077, 145.71974998, 132.02610016,
       129.54880104, 137.74535042, 123.00736652, 184.35535217,
       181.92169861,  88.41499802,  99.87930031, 105.51892639,
        89.95820019,  94.48395092,  93.62755009, 127.96960014,
       139.81020256, 173.67049988, 130.5075531 ,  96.1574007 ,
       173.45630264, 144.00510124, 123.59212738, 134.36800034,
       144.04275253, 181.93710007, 184.44355125, 127.18

In [300]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f'MSE: {mse}')
print(f'MAE: {mae}')

MSE: 1.9721198374142805
MAE: 1.1284927151753352


In [301]:
comparison = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
comparison

Unnamed: 0,Actual,Predicted
275,125.070000,127.219498
93,127.360001,127.797649
6,111.250000,110.094402
167,82.870003,83.560652
90,130.910004,132.089999
...,...,...
69,142.899994,141.514652
357,128.059998,125.596900
423,146.740005,147.949598
334,134.929993,134.613501


In [302]:
future_data = {
    'High': [data['High'].iloc[-1]],
    'Low': [data['Low'].iloc[-1]],
    'Close': [data['Close'].iloc[-1]],
    'Volume': [data['Volume'].iloc[-1]]
}

future_df = pd.DataFrame(future_data)
future_pred = model.predict(future_df)

print(f'Dự đoán giá Close trong tương lai: {future_pred[0]}')


Dự đoán giá Close trong tương lai: 181.92169860839843
