In [3]:
using Pkg
Pkg.activate("../")

[32m[1m  Activating[22m[39m project at `/workspaces/algo-trading-notes`


In [4]:
using YFinance
using Dates
using CSV
using RollingFunctions
using DataFrames
using StatsBase

In [5]:
# Remark:
# Understand the difference between filling in NaN and missing in Julia

function get_stock_prices(ticker::String, startdt::String, enddt::String, interval::String)
    data_ordered_dict = get_prices(ticker, startdt=startdt, enddt=enddt, interval=interval)
    df = DataFrame(data_ordered_dict)
    return df
end

get_stock_prices (generic function with 1 method)

In [11]:
# test API call for YFinance
data = get_stock_prices("AAPL", "2010-1-1", "2020-12-31", "1d")

Row,ticker,timestamp,open,high,low,close,adjclose,vol
Unnamed: 0_level_1,String,DateTime,Float64,Float64,Float64,Float64,Float64,Float64
1,AAPL,2010-01-04T14:30:00,6.46144,6.49383,6.42965,7.64321,6.479,4.18525e8
2,AAPL,2010-01-05T14:30:00,6.49686,6.52683,6.45599,7.65643,6.4902,5.10222e8
3,AAPL,2010-01-06T14:30:00,6.4902,6.51593,6.3803,7.53464,6.38696,4.68055e8
4,AAPL,2010-01-07T14:30:00,6.41058,6.41815,6.32884,7.52071,6.37516,4.04454e8
5,AAPL,2010-01-08T14:30:00,6.36668,6.41815,6.32914,7.57071,6.41754,3.79431e8
6,AAPL,2010-01-11T14:30:00,6.44237,6.44842,6.31067,7.50393,6.36093,3.91823e8
7,AAPL,2010-01-12T14:30:00,6.33307,6.35063,6.24922,7.41857,6.28857,5.03911e8
8,AAPL,2010-01-13T14:30:00,6.29312,6.38575,6.17898,7.52321,6.37728,5.13602e8
9,AAPL,2010-01-14T14:30:00,6.36093,6.37152,6.32793,7.47964,6.34034,3.66955e8
10,AAPL,2010-01-15T14:30:00,6.38575,6.40604,6.23257,7.35464,6.23438,5.03579e8


In [None]:
# create a column that calculates returns based on the column close
data[!,"return"] = vcat(fill(missing, 1), diff(data[!,"close"]) ./ data[!,"close"][1:end-1]);


In [None]:
data = dropmissing(data, [:SMA1, :SMA2])
data[!,"position"] = ifelse.(data[!,"SMA1"] .> data[!,"SMA2"], 1, -1);

In [None]:
first(data,5)

In [None]:
using Plots
plot(data[!, "position"], title = "EUR Price/SMA Crossover Strategy", label = "position")

In [None]:
data[!, "strategy"] = vcat(missing, data[1:end-1, "position"]) .* data[!, "return"];

In [None]:
data = dropmissing(data, [:strategy])

In [None]:
data[!, "cumulative_strategy"] = cumsum(data[!, "strategy"])
data[!, "cumulative_return"] = cumsum(data[!, "return"]);

In [None]:
strategy_performance = data[end,"cumulative_strategy"]
strategy_outperformance = strategy_performance - data[end,"cumulative_return"]

In [35]:
function backtest_strategy(data, param1, param2)
    df = copy(data)
    df[!, "SMA1"] = vcat(fill(missing, param1 - 1), rolling(mean, df[!, "close"], param1))
    df[!, "SMA2"] = vcat(fill(missing, param2 - 1), rolling(mean, df[!, "close"], param2))
    df[!,"return"] = vcat(fill(missing, 1), diff(df[!,"close"]) ./ df[!,"close"][1:end-1]);
    df = dropmissing(df, [:SMA1, :SMA2])
    df[!,"position"] = ifelse.(df[!,"SMA1"] .> df[!,"SMA2"], 1, -1);
    df[!, "strategy"] = vcat(missing, df[1:end-1, "position"]) .* df[!, "return"];
    df = dropmissing(df, [:strategy])
    df[!, "cumulative_strategy"] = cumsum(df[!, "strategy"])
    df[!, "cumulative_return"] = cumsum(df[!, "return"]);
    # Calculate performance and outperformance
    strategy_performance = df[end, "cumulative_strategy"]
    strategy_outperformance = strategy_performance - df[end, "cumulative_return"]
    return strategy_performance, strategy_outperformance
end

function optimize_parameters(min_param1, max_param1, step_size1, min_param2, max_param2,step_size2)
    best_param1 = min_param1
    best_param2 = min_param2
    max_outperformance = -Inf

    # Call get_stock_prices once before the loop
    data = get_stock_prices("AAPL", "2010-01-01", "2020-12-31", "1d")

    for param1 in range(min_param1, stop=max_param1, step=step_size1)
        for param2 in range(min_param2, stop=max_param2, step=step_size2)
            strategy_performance, strategy_outperformance = backtest_strategy(data, param1, param2)

            # Update best parameters if outperformance is higher
            if strategy_outperformance > max_outperformance
                max_outperformance = strategy_outperformance
                println("New best outperformance: ", max_outperformance)
                best_param1 = param1
                best_param2 = param2
            end
        end
    end

    return (best_param1, best_param2)
end

optimize_parameters (generic function with 2 methods)

In [36]:
@time optimize_parameters(42, 56, 1, 252, 300, 1)

New best outperformance: -0.36734823032134667
New best outperformance: -0.3526043210215928
  0.518708 seconds (7.14 M allocations: 811.012 MiB, 9.19% gc time, 0.79% compilation time)


(42, 253)

In [46]:
using DataFrames, RollingFunctions

function calculate_sma(df::DataFrame, window::Int)
    return vcat(fill(missing, window - 1), rollmean(df[!, :close], window))
end

function backtest_sma_strategy(df::DataFrame, short_window::Int, long_window::Int)
    df = copy(df)
    df[!, :ShortSMA] = calculate_sma(df, short_window)
    df[!, :LongSMA] = calculate_sma(df, long_window)
    df = dropmissing(df, [:ShortSMA, :LongSMA])
    df[!, :Signal] = ifelse.(df[!,"ShortSMA"] .> df[!,"LongSMA"], 1, -1)
    df[!, :MarketReturn] = [missing; diff(df[!, :close]) ./ df[1:end-1, :close]]
    df[!, :StrategyReturn] = df[!, :MarketReturn] .* df[!, :Signal]
    df = dropmissing(df)
    return df
end

backtest_sma_strategy (generic function with 1 method)

In [61]:
function optimize_parameters(df::DataFrame, min_short::Int, max_short::Int, min_long::Int, max_long::Int)
    best_short = min_short
    best_long = min_long
    best_return = -Inf

    for short_window in min_short:max_short
        for long_window in min_long:max_long
            result = backtest_sma_strategy(df, short_window, long_window)
            result[!, :CumulativeMarketReturn] = cumsum(result[!, :MarketReturn])
            result[!, :CumulativeStrategyReturn] = cumsum(result[!, :StrategyReturn])
            # strategy_return = sum(result[!, :StrategyReturn])
            if result[end, :CumulativeStrategyReturn] > best_return
                best_return = result[end, :CumulativeStrategyReturn]
                best_short = short_window
                best_long = long_window
            end
        end
    end

    return best_short, best_long, best_return
end


df = get_stock_prices("AAPL", "2010-01-01", "2023-12-31", "1d")  # Replace with your function to get stock prices
best_short, best_long, best_return = optimize_parameters(df, 42, 56, 252, 300)
println("Best parameters: ", best_short, ", ", best_long)
println("Best return: ", best_return)

Best parameters: 44, 252
Best return: 2.598210645596513


In [40]:

data = get_stock_prices("AAPL", "2010-01-01", "2020-12-31", "1d")  # Replace with your function to get stock prices
best_params = optimize_parameters(data)
println("Best parameters: ", best_params)

LoadError: InexactError: Int64(NaN)