In [None]:
import yfinance as yf
import pandas as pd

def get_stock_data(ticker, start_date, end_date):
    """
    Fetches stock data for the given ticker symbol within the specified time period.

    Parameters:
        ticker (str): Ticker symbol of the stock.
        start_date (str): Start date in the format 'YYYY-MM-DD'.
        end_date (str): End date in the format 'YYYY-MM-DD'.

    Returns:
        DataFrame: Stock data for the specified time period.
    """
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data

# Specify the date range
start_date = '2010-01-01'  # Example start date
end_date = '2024-01-01'    # Example end date

# Fetch stock data for BMW within the specified time period
df= get_stock_data('BMW.DE', start_date, end_date)

# Display the stock data
display(df)

In [None]:
df.plot.line(y='Close')

In [None]:
df['Tomorrow'] = df['Close'].shift(-1)

In [None]:
df['Target'] = (df['Tomorrow'] > df['Close']).astype(int)
df

In [None]:
train = df.iloc[:-100]
test = df.iloc[-100:]
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier (n_estimators=100, min_samples_split=100, random_state=1)
predictors = ['Close', 'Volume', 'Open', 'High', 'Low']
model.fit(train[predictors], train['Target'])

In [None]:
from sklearn.metrics import precision_score
preds = model.predict(test[predictors])
import pandas as pd
preds = pd.Series(preds, index=test.index)

In [None]:
precision_score(test['Target'], preds)

In [None]:
combined = pd.concat([test['Target'], preds], axis=1)
combined.plot()

In [None]:
def backtest(data, model, predictors, start=2500, step=250):
    all_predictions = []

    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i+step)].copy()
        predictions = predict(train, test, predictors, model)
        all_predictions.append(predictions)

    return pd.concat(all_predictions)

In [None]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict(test[predictors])
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined

In [None]:

predictions = backtest(df, model, predictors)
predictions["Predictions"].value_counts()

In [None]:
precision_score(predictions["Target"], predictions["Predictions"])


In [None]:
predictions["Target"].value_counts() / predictions.shape[0]


In [None]:
horizons = [2,5,60,250,1000]
new_predictors = []

for horizon in horizons:
    rolling_averages = df.rolling(horizon).mean()

    ratio_column = f"Close_Ratio_{horizon}"
    df[ratio_column] = df["Close"] / rolling_averages["Close"]

    trend_column = f"Trend_{horizon}"
    df[trend_column] = df.shift(1).rolling(horizon).sum()["Target"]

    new_predictors+= [ratio_column, trend_column]

In [None]:
df = df.dropna(subset=df.columns[df.columns != "Tomorrow"])
df

In [None]:
model = RandomForestClassifier(n_estimators=200, min_samples_split=50, random_state=1)
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict_proba(test[predictors])[:,1]
    preds[preds >=.6] = 1
    preds[preds <.6] = 0
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined
predictions = backtest(df, model, new_predictors)
predictions["Predictions"].value_counts()

In [None]:
precision_score(predictions["Target"], predictions["Predictions"])


In [None]:
predictions["Target"].value_counts() / predictions.shape[0]
predictions

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import pandas as pd
label_encoder = LabelEncoder()
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers, Sequential


In [None]:
X_train = df.drop(["Tomorrow", "Target"], axis=1)  # Exclude the 'Tomorrow' column and 'Target' column from predictors
Y_train = df["Target"]

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.20)

# Assuming your input data has timesteps
timesteps = x_train.shape[1]  # Number of timesteps
features = x_train.shape[2] if len(x_train.shape) > 2 else 1  # Number of features per timestep

# Define the model architecture
model = Sequential([
    layers.Input(shape=(timesteps, features)),  # Input shape with timesteps and features

    layers.Bidirectional(layers.LSTM(400, return_sequences=True)),
    layers.Dropout(0.2),  # Add Dropout to prevent overfitting
    layers.Bidirectional(layers.GRU(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.LSTM(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.GRU(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.LSTM(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.GRU(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.LSTM(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.GRU(400, return_sequences=False)),
    layers.Dropout(0.2),

    layers.Dense(1, activation='sigmoid')  # Output layer
])

# Define optimizer and compile the model
learning_rate = 0.00001
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Define Early Stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with Early Stopping
history = model.fit(x_train, y_train, epochs=30, batch_size=32, validation_split=0.1, callbacks=[early_stopping])

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print("Test Accuracy:", test_accuracy)

In [None]:

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print("Test Accuracy:", test_accuracy)

In [None]:


#df = pd.read_csv('https://raw.githubusercontent.com/umar-farooq-khan/m-en-dataset/main/M-En%20Dataset.csv')
y = df['target'].replace('Normal', 0).replace('Anomaly', 1)
X = df.drop(['target', df.columns[0]], axis=1)
x_train, x_test, y_train, y_test = train_test_split(X, y,test_size=0.20)

from tensorflow.keras import layers, Sequential

# Assuming your input data has timesteps
timesteps = x_train.shape[1]  # Number of timesteps
features = x_train.shape[2] if len(x_train.shape) > 2 else 1  # Number of features per timestep
model = Sequential([
    layers.Input(shape=(timesteps, features)),  # Input shape with timesteps and features

    layers.Bidirectional(layers.LSTM(400, return_sequences=True)),
    layers.Dropout(0.2),  # Add Dropout to prevent overfitting
    layers.Bidirectional(layers.GRU(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.LSTM(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.GRU(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.LSTM(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.GRU(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.LSTM(400, return_sequences=True)),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.GRU(400, return_sequences=False)),
    layers.Dropout(0.2),

    layers.Dense(1, activation='sigmoid')  # Output layer
])

# Define optimizer and compile the model
learning_rate = 0.00001
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Define Early Stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with Early Stopping
history = model.fit(x_train, y_train, epochs=100, batch_size=32, validation_split=0.1, callbacks=[early_stopping])

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print("Test Accuracy:", test_accuracy)