Preparing variables for my data table

Feature Variable List:

eur_jpy_wc
eur_jpy_wc

gbp_jpy_MA5
gbp_jpy_MA20

eur_gbp_MA5
eur_gbp_MA20

gbp_jpy_rsi
eur_gbp_rsi

gbp_jpy_ema12
eur_gbp_ema12

gbp_jpy_data_20_SD
gbp_jpy_upperbound
gbp_jpy_lowerbound
eur_gbp_data_20_SD
eur_gbp_upperbound
eur_gbp_lowerbound

r_gbp_lowerbound

gbp_jpy_26_EMA
eur_gbp_26_EMA

gbp_jpy_SO
eur_gbp_SO

gbp_jpy_MACD
eur_gbp_MACD



In [2]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import csv

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor  # Example model
from sklearn.metrics import mean_squared_error


import schedule
import time
from datetime import datetime

# Download EUR/GBP data for the past 6 months with hourly intervals
eur_gbp_data = yf.download("EURGBP=X", interval="1h", period="6mo")

# Download GBP/JPY data for the past 6 months with hourly intervals
gbp_jpy_data = yf.download("EURUSD=X", interval="1h", period="6mo")



gbp_usd_data = yf.download("GBPUSD=X", interval="1h", period="6mo")

gbp_usd_data['GBP/USD_2hr'] = gbp_usd_data['Close'].shift(+1)

gbp_jpy_data['GBP/USD_2hr'] = gbp_usd_data['GBP/USD_2hr']



#RSI
def calculate_rsi(data, window=14):
    delta = data['Close'].diff()  # Calculate the difference in 'Close' prices
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()  # Calculate rolling mean of gains
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()  # Calculate rolling mean of losses
    
    # To avoid division by zero, replace 0 losses with a small value
    loss = loss.replace(0, 1e-10)
    
    rs = gain / loss  # Calculate the relative strength (RS)
    rsi = 100 - (100 / (1 + rs))  # Compute RSI
    
    return rsi

# Apply the function to your dataset
gbp_jpy_data['GBPJPY_RSI'] = calculate_rsi(gbp_jpy_data)

gbp_jpy_data['GBPUSD_RSI'] = calculate_rsi(gbp_usd_data)



#ATR

def calculate_atr(data, window=14):
    # Calculate the True Range (TR)
    high_low = data['High'] - data['Low']
    high_close = (data['High'] - data['Close'].shift()).abs()
    low_close = (data['Low'] - data['Close'].shift()).abs()
    
    true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    
    # Calculate the Average True Range (ATR)
    atr = true_range.rolling(window=window).mean()
    
    return atr

# Apply the ATR function to the GBP/JPY dataset
gbp_jpy_data['GBPJPY_ATR'] = calculate_atr(gbp_jpy_data)


def calculate_sma(data, window=14):
    """
    Calculate Simple Moving Average (SMA).
    """
    sma = data['Close'].rolling(window=window).mean()  # Calculate the rolling mean
    return sma

def calculate_ema(data, window=14):
    """
    Calculate Exponential Moving Average (EMA).
    """
    ema = data['Close'].ewm(span=window, adjust=False).mean()  # Calculate the exponentially weighted mean
    return ema

# Add SMA and EMA columns to your dataframe
gbp_jpy_data['GBPJPY_SMA_14'] = calculate_sma(gbp_jpy_data, window=14)  # 14-period SMA
gbp_jpy_data['GBPJPY_EMA_14'] = calculate_ema(gbp_jpy_data, window=14)  # 14-period EMA

eur_gbp_data['EURGBP_RSI'] = calculate_rsi(eur_gbp_data)
eur_gbp_data['EURGBP_ATR'] = calculate_atr(eur_gbp_data)
eur_gbp_data['EURGBP_SMA'] = calculate_sma(eur_gbp_data)
eur_gbp_data['EURGBP_EMA'] = calculate_ema(eur_gbp_data)

# Add EUR/GBP 'Close', RSI, ATR, SMA, EMA to the GBP/JPY dataset
gbp_jpy_data['EURGBP_Close'] = eur_gbp_data['Close']
gbp_jpy_data['EURGBP_RSI'] = eur_gbp_data['EURGBP_RSI']
gbp_jpy_data['EURGBP_ATR'] = eur_gbp_data['EURGBP_ATR']
gbp_jpy_data['EURGBP_SMA'] = eur_gbp_data['EURGBP_SMA']
gbp_jpy_data['EURGBP_EMA'] = eur_gbp_data['EURGBP_EMA']


gbp_usd_data['GBPUSD_SMA'] = calculate_sma(gbp_usd_data)
gbp_usd_data['GBPUSD_EMA'] = calculate_ema(gbp_usd_data)

#gbp_jpy_data['GBPUSD_SMA'] = gbp_usd_data['GBPUSD_SMA']
#gbp_jpy_data['GBPUSD_EMA'] = gbp_usd_data['GBPUSD_EMA']


gbp_jpy_data["GBP/JPY_Close"] = gbp_jpy_data["Close"]

gbp_jpy_data = gbp_jpy_data.drop(columns=['Volume', 'Adj Close'])
gbp_jpy_data = gbp_jpy_data.dropna()

gbp_jpy_data_log_scaled = gbp_jpy_data.apply(lambda x: np.log1p(x) if np.issubdtype(x.dtype, np.number) else x)





X = gbp_jpy_data.drop(columns=['GBP/USD_2hr'])  # Features (all columns except the target)
y = gbp_jpy_data['GBP/USD_2hr']  # Target column

# Step 2: Split the data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train the model (Example: Random Forest Regressor)
model = RandomForestRegressor()
model.fit(X_train, y_train)  # Train the model on the training set

# Step 4: Evaluate the model
y_pred = model.predict(X_test)  # Make predictions on the test set
mse = mean_squared_error(y_test, y_pred)  # Calculate Mean Squared Error

print(f"Mean Squared Error on the test set: {mse}")

# Optional: View some predictions vs actual values
comparison_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(comparison_df.head())


last_row = X.tail(1)  # Get the last row of features (without the target)

# Make a prediction for the last row
last_row_prediction = model.predict(last_row)



# Output the prediction
print(X.tail(1))

print(f'last_row_prediction: {last_row_prediction}')



importances = model.feature_importances_

# Display feature importance alongside feature names
feature_importances = pd.DataFrame({
    'feature': X.columns,
    'importance': importances
}).sort_values(by='importance', ascending=False)

print(feature_importances)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Mean Squared Error on the test set: 5.970902601170592e-07
                             Actual  Predicted
Datetime                                      
2024-10-30 02:00:00+00:00  1.301152   1.300738
2024-10-28 18:00:00+00:00  1.297522   1.297115
2024-10-09 18:00:00+01:00  1.306933   1.306942
2024-05-17 12:00:00+01:00  1.264878   1.265353
2024-10-25 12:00:00+01:00  1.298718   1.298583
                               Open      High       Low     Close  GBPJPY_RSI  \
Datetime                                                                        
2024-11-05 18:00:00+00:00  1.093255  1.093972  1.093255  1.093853   88.071199   

                           GBPUSD_RSI  GBPJPY_ATR  GBPJPY_SMA_14  \
Datetime                                                           
2024-11-05 18:00:00+00:00   83.522866    0.000951       1.090337   

                           GBPJPY_EMA_14  EURGBP_Close  EURGBP_RSI  \
Datetime                                                             
2024-11-05 18:00:00+00:0

In [2]:

data = {
    'gbp_jpy_wc': gbp_jpy_wc,
    'gbp_jpy_MA5': gbp_jpy_MA5,
    'gbp_jpy_MA20': gbp_jpy_MA20,
    'eur_gbp_MA5': eur_gbp_MA5,
    'eur_gbp_MA20': eur_gbp_MA20,
    'gbp_jpy_rsi': gbp_jpy_rsi,
    'eur_gbp_rsi': eur_gbp_rsi,
    'gbp_jpy_ema12': gbp_jpy_ema12,
    'eur_gbp_ema12': eur_gbp_ema12,
    'gbp_jpy_data_20_SD': gbp_jpy_data_20_SD,
    'gbp_jpy_upperbound': gbp_jpy_upperbound,
    'gbp_jpy_lowerbound': gbp_jpy_lowerbound,
    'eur_gbp_data_20_SD': eur_gbp_data_20_SD,
    'eur_gbp_upperbound': eur_gbp_upperbound,
    'eur_gbp_lowerbound': eur_gbp_lowerbound
}

# Create DataFrame
df = pd.DataFrame(data, index=gbp_jpy_wc.index)

print(df.head())

                           gbp_jpy_wc  gbp_jpy_MA5  gbp_jpy_MA20  eur_gbp_MA5  \
Datetime                                                                        
2024-04-30 09:00:00+01:00  196.726250          NaN           NaN          NaN   
2024-04-30 10:00:00+01:00  196.861752          NaN           NaN          NaN   
2024-04-30 11:00:00+01:00  196.824497          NaN           NaN          NaN   
2024-04-30 12:00:00+01:00  196.825500          NaN           NaN          NaN   
2024-04-30 13:00:00+01:00  196.965000   196.871402           NaN     0.854804   

                           eur_gbp_MA20  gbp_jpy_rsi  eur_gbp_rsi  \
Datetime                                                            
2024-04-30 09:00:00+01:00           NaN          NaN          NaN   
2024-04-30 10:00:00+01:00           NaN          NaN          NaN   
2024-04-30 11:00:00+01:00           NaN          NaN          NaN   
2024-04-30 12:00:00+01:00           NaN          NaN          NaN   
2024-04-30 13:00:0