# Prédiction du marché boursier

## Lire les données

In [2]:
import pandas as pd
from datetime import datetime

# Lire les données et convertir la colonne Date en objet datetime
df = pd.read_csv('C:/Users/yessm/Desktop/sphist.csv')
df['Date'] = pd.to_datetime(df['Date'])

# Trier par date du plus vieux au plus récent
df = df.sort_values("Date", ascending=True)

df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
16589,1950-01-03,16.66,16.66,16.66,16.66,1260000.0,16.66
16588,1950-01-04,16.85,16.85,16.85,16.85,1890000.0,16.85
16587,1950-01-05,16.93,16.93,16.93,16.93,2550000.0,16.93
16586,1950-01-06,16.98,16.98,16.98,16.98,2010000.0,16.98
16585,1950-01-09,17.08,17.08,17.08,17.08,2520000.0,17.08


## Générer des indicateurs

In [7]:
# Ajouter des indicateurs utiles pour notre modèle de ML
df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
df['Year'] = df['Date'].apply(lambda x: x.year)

# Ajouter la colonne DOW ou Day Of Week et la convertir en type catégorie
df['DOW'] = df['Date'].apply(lambda x: x.weekday())
dow_df = pd.get_dummies(df['DOW'])
df = pd.concat([df, dow_df], axis=1)
df = df.drop(['DOW'], axis=1)

# Déplacer la colonne d'un jour
df['5 Days Open'] = df['5 Days Open'].shift(1)

df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close,5 Days Open,Year,0,1,2,3,4
16589,1950-01-03,16.66,16.66,16.66,16.66,1260000.0,16.66,,1950,0,1,0,0,0
16588,1950-01-04,16.85,16.85,16.85,16.85,1890000.0,16.85,,1950,0,0,1,0,0
16587,1950-01-05,16.93,16.93,16.93,16.93,2550000.0,16.93,,1950,0,0,0,1,0
16586,1950-01-06,16.98,16.98,16.98,16.98,2010000.0,16.98,,1950,0,0,0,0,1
16585,1950-01-09,17.08,17.08,17.08,17.08,2520000.0,17.08,,1950,1,0,0,0,0


## Séparer les données

In [8]:
df = df[df['Date'] >= datetime(year=1951, month=1, day=3)]
df.dropna(axis=0)

train = df[df['Date'] < datetime(year=2013, month=1, day=1)]
test = df[df['Date'] >= datetime(year=2013, month=1, day=1)]

## Faire des prédictions

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

features = ['5 Days Open', 'Year', 0, 1, 2, 3, 4]

lr = LinearRegression()
lr.fit(train[features], train['Close'])
predictions = lr.predict(test[features])

mae = mean_absolute_error(test['Close'], predictions)

print(df.tail(1))
print(mae)

        Date         Open         High          Low        Close  \
0 2015-12-07  2090.419922  2090.419922  2066.780029  2077.070068   

         Volume    Adj Close  5 Days Open  Year  0  1  2  3  4  
0  4.043820e+09  2077.070068  2081.507959  2015  1  0  0  0  0  
19.28529021732864
