In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from scipy.stats import f as fisher_f
import matplotlib.pyplot as plt

In [2]:
df_atvi = pd.read_csv("atvi_weekly_return_detailed.csv")
df_spy = pd.read_csv("spy_weekly_return_detailed.csv")

In [3]:
df_atvi_yr1 = df_atvi[df_atvi["Year"] == 2021]
df_spy_yr1 = df_spy[df_spy["Year"] == 2021]

df_atvi_yr2 = df_atvi[df_atvi["Year"] == 2022]
df_spy_yr2 = df_spy[df_spy["Year"] == 2022]

In [4]:
def assign_months(df):
    dates = df["Date"].values
    df["Month"] = ""
    for i in range(dates.shape[0]):
        df.iat[i, -1] = dates[i][5:7]

In [5]:
assign_months(df_atvi_yr1)
assign_months(df_atvi_yr2)
assign_months(df_spy_yr1)
assign_months(df_spy_yr2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Month"] = ""


In [6]:
print("Total trading days in each Month")
df_atvi_yr1.groupby("Month")["Date"].count().reset_index()

Total trading days in each Month


Unnamed: 0,Month,Date
0,1,19
1,2,19
2,3,23
3,4,21
4,5,20
5,6,22
6,7,21
7,8,22
8,9,21
9,10,21


In [15]:
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
def get_sig_diffs(df, months):
    sig_dif = 0
    for i in months:
        data_in_month_i = df[df["Month"] == i]
        days = np.arange(1, data_in_month_i.shape[0] + 1)
        closings = data_in_month_i["Adj Close"].values
        x = days.reshape(-1, 1)
        y = closings.reshape(-1, 1)
        lm_i = LinearRegression().fit(x, y)
        predictions = lm_i.predict(x)
        mse = mean_squared_error(y, predictions)
        sse_total = mse*days.shape[0]
        minimum = sse_total
        k1,k2 = 0,0
        for j in range(1, days.shape[0] - 2):
            x1 = days[:days[j]].reshape(-1, 1)
            x2 = days[days[j]:].reshape(-1, 1)
            y1 = closings[:days[j]].reshape(-1, 1)
            y2 = closings[days[j]:].reshape(-1, 1)
            lm1 = LinearRegression().fit(x1, y1)
            pred1 = lm1.predict(x1)
            mse1 = mean_squared_error(y1, pred1)
            sse1 = mse1*days[j]
            lm2 = LinearRegression().fit(x2, y2)
            pred2 = lm2.predict(x2)
            mse2 = mean_squared_error(y2, pred2)
            sse2 = mse2*(days.shape[0] - days[j])
            if sse1 + sse2 <= minimum:
                minimum = sse1 + sse2
                k1 = days[j]
                k2 = days.shape[0] - days[j]
        numerator = (sse_total - minimum)/2
        denominator = minimum/(days.shape[0] - 4)
        f_val = numerator/denominator
        # Getting P-Value for One Tailed F-Test
        p_val = 1 - fisher_f.cdf(f_val, 2, days.shape[0] - 4)
        if p_val < 0.1:
            print(f"For Month {i} -> Candidate Days = ({k1},{k2}). The difference is significant")
            sig_dif += 1
        else:
            print(f"For Month {i} -> Candidate Days = ({k1},{k2}). The difference is not significant")
        
        # Plotting graphs to check linear regression lines
        # x1 = days[:k1].reshape(-1, 1)
        # x2 = days[k1:].reshape(-1, 1)
        # y1 = closings[:k1].reshape(-1, 1)
        # y2 = closings[k1:].reshape(-1, 1)
        # lm1 = LinearRegression().fit(x1, y1)
        # pred1 = lm1.predict(x1)
        # lm2 = LinearRegression().fit(x2, y2)
        # pred2 = lm2.predict(x2)
        # plt.figure(figsize=(10, 10))
        # plt.scatter(x1, y1)
        # plt.scatter(x2, y2)
        # plt.plot(x1, pred1)
        # plt.plot(x2, pred2)
        # plt.show()
    print(f"Total Significant Differences -> {sig_dif}")

In [16]:
print("For Activision Blizzard Year 1 ->")
get_sig_diffs(df_atvi_yr1, months)
print("\nFor Activision Blizzard Year 2 ->")
get_sig_diffs(df_atvi_yr2, months)

For Activision Blizzard Year 1 ->
For Month 01 -> Candidate Days = (11,8). The difference is significant
For Month 02 -> Candidate Days = (8,11). The difference is significant
For Month 03 -> Candidate Days = (6,17). The difference is significant
For Month 04 -> Candidate Days = (12,9). The difference is significant
For Month 05 -> Candidate Days = (7,13). The difference is significant
For Month 06 -> Candidate Days = (11,11). The difference is significant
For Month 07 -> Candidate Days = (17,4). The difference is significant
For Month 08 -> Candidate Days = (7,15). The difference is significant
For Month 09 -> Candidate Days = (13,8). The difference is significant
For Month 10 -> Candidate Days = (13,8). The difference is significant
For Month 11 -> Candidate Days = (2,19). The difference is significant
For Month 12 -> Candidate Days = (7,15). The difference is significant
Total Significant Differences -> 12

For Activision Blizzard Year 2 ->
For Month 01 -> Candidate Days = (10,10). 

In [17]:
print("For S&P 500 Year 1 ->")
get_sig_diffs(df_spy_yr1, months)
print("\nFor S&P 500 Year 2 ->")
get_sig_diffs(df_spy_yr2, months)

For S&P 500 Year 1 ->
For Month 01 -> Candidate Days = (16,3). The difference is significant
For Month 02 -> Candidate Days = (9,10). The difference is significant
For Month 03 -> Candidate Days = (6,17). The difference is significant
For Month 04 -> Candidate Days = (11,10). The difference is significant
For Month 05 -> Candidate Days = (7,13). The difference is significant
For Month 06 -> Candidate Days = (13,9). The difference is significant
For Month 07 -> Candidate Days = (10,11). The difference is significant
For Month 08 -> Candidate Days = (12,10). The difference is significant
For Month 09 -> Candidate Days = (15,6). The difference is significant
For Month 10 -> Candidate Days = (9,12). The difference is significant
For Month 11 -> Candidate Days = (18,3). The difference is significant
For Month 12 -> Candidate Days = (12,10). The difference is significant
Total Significant Differences -> 12

For S&P 500 Year 2 ->
For Month 01 -> Candidate Days = (13,7). The difference is sign