**Download data**

In [None]:
import requests

url = "https://github.com/vharatian/MLIntrodcutionWorkshop/raw/main/all_stocks_5yr.csv"
file_address = "/content/data.csv"

response = requests.get(url)

if response.status_code == 200:
    with open(file_address, 'wb') as file:
        file.write(response.content)
    print(f"File '{file_address}' downloaded successfully.")
else:
    print(f"Failed to download the file. Status code: {response.status_code}")

**Load data and filter Apple**

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv(file_address)
df = df.loc[df['Name'] == 'AAPL']
df.info()

In [None]:
df2 = df['close']
df2 = pd.DataFrame(df2)

future_days = 100
df2['Prediction'] = df2['close'].shift(-future_days)

X = np.array(df2.drop(['Prediction'], 1))[:-future_days]
y = np.array(df2['Prediction'])[:-future_days]

**Split the data and create models**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
x_future = df2.drop(['Prediction'], 1)[:-future_days]
x_future = x_future.tail(future_days)
x_future = np.array(x_future)

**Make predictions**

In [72]:
def draw_prediction(predictions):
  valid = df2[X.shape[0]:]
  valid['Predictions'] = predictions

  plt.figure(figsize=(16,8))
  plt.title("Model")
  plt.xlabel('Days')
  plt.ylabel('Close Price USD ($)')
  plt.plot(df2['close'])
  plt.plot(valid[['close', 'Predictions']])
  plt.legend(["Original", "Valid", 'Predicted'])
  plt.show()

**Draw linear regression outcome**

In [None]:
lr = LinearRegression().fit(x_train, y_train)
lr_prediction = lr.predict(x_future)

draw_prediction(lr_prediction)

**Improve liner model**

In [None]:
from sklearn.preprocessing import PolynomialFeatures

poly_reg = PolynomialFeatures(degree = 3)
X_polynomial = poly_reg.fit_transform(x_train)
lr_pol = LinearRegression()
lr_pol.fit(X_polynomial, y_train)
lr_pol_prediction = lr_pol.predict(poly_reg.fit_transform(x_future))

draw_prediction(lr_pol_prediction)

**Draw decision tree outcome**

In [None]:
tree = DecisionTreeRegressor().fit(x_train, y_train)

tree_prediction = tree.predict(x_future)
draw_prediction(tree_prediction)

In [None]:
from datetime import datetime, timedelta

def get_historical_data(symbol, api_key, outputsize='full'):
    base_url = "https://www.alphavantage.co/query"
    function = "TIME_SERIES_DAILY"
    api_params = {
        "function": function,
        "symbol": symbol,
        "outputsize": outputsize,
        "apikey": api_key,
    }

    response = requests.get(base_url, params=api_params)

    if response.status_code == 200:
        data = response.json()
        time_series_data = data.get("Time Series (Daily)")
        if time_series_data:
            # Convert data to a DataFrame for easier manipulation
            df = pd.DataFrame(time_series_data).T
            df.index = pd.to_datetime(df.index)
            df = df.iloc[::-1]  # Reverse the DataFrame to have the oldest data first

            # Reorder columns
            df = df[['1. open', '2. high', '3. low', '4. close', '5. volume']]

            # Rename columns
            df.columns = ['open', 'high', 'low', 'close', 'volume']

            # Convert data types
            df = df.astype({
                'open': 'float64',
                'high': 'float64',
                'low': 'float64',
                'close': 'float64',
                'volume': 'int64'
            })

            # Add 'date' and 'name' columns
            df['date'] = df.index
            df['name'] = symbol

            # Reorder columns as per the specified order
            df = df[['date', 'open', 'high', 'low', 'close', 'volume', 'name']]

            # Reset index to have a default integer index starting from 0
            df.reset_index(drop=True, inplace=True)

            return df
        else:
            print("No data available.")
            return None
    else:
        print(f"Error: {response.status_code}")
        return None

stock_symbol = 'GOOGL'
api_key = '3WPIH9XAO32B3G05'
# Get historical data for the last 3000 days
historical_data = get_historical_data(stock_symbol, api_key, outputsize='full')

historical_data.tail()

In [None]:
historical_data.info()
df = historical_data

In [None]:
plt.figure(figsize=(16,8))
plt.title('Apple')
plt.xlabel('Days')
plt.ylabel('Closing Price USD ($)')
plt.plot(historical_data['close'])
plt.show()

In [None]:
historical_data.tail()