In [1]:
# 0) Chargement des bibliothèques + seed
# --------------------------------------

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

# Pour la régression linéaire
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Pour le réseau de neurones (Keras)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Fixation du seed pour la reproductibilité
SEED = 123
np.random.seed(SEED)
tf.random.set_seed(SEED)

print("Libraries loaded and random seed fixed.")


# 1) Lecture des données (depuis l'URL) et prétraitement
# ------------------------------------------------------

# URL des données
url = "https://turinici.com/wp-content/uploads/cours/common/close_cac40_historical.csv"

# Lecture du fichier CSV en ne chargeant que la colonne 'BNP.PA'
df_raw = pd.read_csv(url, sep=";", usecols=["Date", "BNP.PA"])

# Renommage des colonnes pour plus de clarté
df_raw.rename(columns={"BNP.PA": "Price"}, inplace=True)

# Conversion de la colonne 'Date' en format datetime (si ce n'est pas déjà le cas)
df_raw['Date'] = pd.to_datetime(df_raw['Date'])

# Définir la colonne 'Date' comme index
df_raw.set_index("Date", inplace=True)

# Affichage des premières lignes et dimensions
print("\nAperçu des premières lignes :")
print(df_raw.head())
print("\nDimensions initiales :", df_raw.shape)

# Retirer les lignes contenant des NaN dans la colonne 'Price'
df_raw.dropna(subset=["Price"], inplace=True)

# Tri par index (date) pour s'assurer de l'ordre chronologique
df_raw.sort_index(inplace=True)

# Vérification après nettoyage
print("\nAperçu après nettoyage :")
print(df_raw.head())
print("\nDimensions après nettoyage :", df_raw.shape)





Libraries loaded and random seed fixed.

Aperçu des premières lignes :
                Price
Date                 
2000-01-03  16.165972
2000-01-04  15.711578
2000-01-05  15.379528
2000-01-06  14.855226
2000-01-07  15.047465

Dimensions initiales : (8605, 1)

Aperçu après nettoyage :
               Price
Date                
1993-10-18  4.227787
1993-10-19  4.120383
1993-10-20  4.149676
1993-10-21  4.218023
1993-10-22  4.315662

Dimensions après nettoyage : (7339, 1)


In [69]:
df = df_raw.copy()


In [70]:
df['returns']= np.log(df.Price / df.Price.shift(1))
df['returns'].dropna(axis= 0, inplace=True)
windows = [5,25,75,150,255,3*255]
for w in windows:
    df[f'rolling_mean_{w}_days'] = df['returns'].rolling(w).mean()

In [71]:
df.dropna(axis = 0, inplace = True )

In [79]:
df_returns = df[df.columns[1:]]
df_returns

Unnamed: 0_level_0,returns,rolling_mean_5_days,rolling_mean_25_days,rolling_mean_75_days,rolling_mean_150_days,rolling_mean_255_days,rolling_mean_765_days
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1996-09-23,-0.006993,-5.536723e-03,0.003519,0.001615,0.000738,0.000394,-0.000150
1996-09-24,0.017391,3.053113e-17,0.003156,0.001663,0.000786,0.000502,-0.000094
1996-09-25,0.013699,4.819498e-03,0.004154,0.000508,0.000720,0.000489,-0.000085
1996-09-26,-0.003407,2.058424e-03,0.002391,0.000463,0.000787,0.000436,-0.000111
1996-09-27,0.003407,4.819498e-03,0.001809,0.000556,0.000610,0.000437,-0.000137
...,...,...,...,...,...,...,...
2022-04-14,0.011785,1.226637e-02,0.000129,-0.002712,-0.000414,0.000095,0.000380
2022-04-19,0.004836,7.788658e-03,0.000579,-0.002669,-0.000305,0.000127,0.000383
2022-04-20,0.029415,9.944537e-03,0.000172,-0.002371,0.000196,0.000136,0.000400
2022-04-21,0.022581,1.460446e-02,0.000856,-0.002509,0.000319,0.000228,0.000436


In [80]:
df_returns.loc['1996-09-23']=100
df_returns.cumprod(axis=0)

Unnamed: 0_level_0,returns,rolling_mean_5_days,rolling_mean_25_days,rolling_mean_75_days,rolling_mean_150_days,rolling_mean_255_days,rolling_mean_765_days
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1996-09-23,1.000000e+02,1.000000e+02,1.000000e+02,1.000000e+02,1.000000e+02,1.000000e+02,1.000000e+02
1996-09-24,1.739149e+00,3.053113e-15,3.155745e-01,1.662709e-01,7.858485e-02,5.024862e-02,-9.408654e-03
1996-09-25,2.382477e-02,1.471447e-17,1.310897e-03,8.453904e-05,5.660511e-05,2.459158e-05,8.038727e-07
1996-09-26,-8.117036e-05,3.028862e-20,3.134202e-06,3.914287e-08,4.453581e-08,1.073405e-08,-8.942914e-11
1996-09-27,-2.765453e-07,1.459759e-22,5.669823e-09,2.174922e-11,2.718795e-11,4.688200e-12,1.222569e-14
...,...,...,...,...,...,...,...
2022-04-14,0.000000e+00,0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,0.000000e+00,-0.000000e+00
2022-04-19,0.000000e+00,0.000000e+00,-0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,-0.000000e+00
2022-04-20,0.000000e+00,0.000000e+00,-0.000000e+00,-0.000000e+00,0.000000e+00,0.000000e+00,-0.000000e+00
2022-04-21,0.000000e+00,0.000000e+00,-0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,-0.000000e+00
