In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('..\processed_data\stock_price_monthly_2003_2024_processed.csv')
df["date"] = pd.to_datetime(df["date"]).dt.to_period('M')
df.head(10)

In [None]:
 # Sort data by NCUSIP (Stock ID) and date
df = df.sort_values(by=["NCUSIP", "TICKER", "date"])


df["RET"] = pd.to_numeric(df["RET"], errors="coerce")

# remove rows where RET = -66.0, -77.0, -88.0, -99.0
df = df[(df["RET"] != -66.0) & (df["RET"] != -77.0) & (df["RET"] != -88.0) & (df["RET"] != -99.0)]

# Compute past 12-month return (momentum measure)
df["past_return"] = (
    df.groupby("NCUSIP")["RET"]
    .rolling(window=12, min_periods=12)
    .apply(lambda x: np.prod(1 + x) - 1, raw=True)
    .reset_index(level=0, drop=True)
)

# Compute past 12-month return while skipping the most recent month
df["past_return_skip"] = (
    df.groupby("NCUSIP")["RET"]
    .rolling(window=12, min_periods=12)
    .apply(lambda x: np.prod(1 + x[:-1]) - 1, raw=True)
    .reset_index(level=0, drop=True)
)

# Compute past 12-month variance
df["past_variance"] = (
    df.groupby("NCUSIP")["RET"]
    .rolling(window=12, min_periods=12)
    .var()
    .reset_index(level=0, drop=True)
)

# Rk is the rate of return of a firm lagged k years for k 5 0,1,2,3,4
df["R1"] = df.groupby("NCUSIP")["RET"].shift(12)
df["R2"] = df.groupby("NCUSIP")["RET"].shift(24)
df["R3"] = df.groupby("NCUSIP")["RET"].shift(36)
df["R4"] = df.groupby("NCUSIP")["RET"].shift(48)


df

In [None]:
df[df["NCUSIP"] == "U7260311"].sort_values(by="date")

In [None]:
# get year = 2013
_test = df[(df["NCUSIP"] == "U7260311") & (df["date"].dt.year == 2016)]

mult = 1
for _t in _test['RET']:
    mult = mult * (1 + _t)
mult - 1

In [None]:
df["ME"] = abs(df["PRC"]) * df["SHROUT"]
# turnover
df["T-O"] = df["VOL"] / df["SHROUT"]

In [None]:
df.drop(columns=["VOL", "vwretd"], inplace=True, errors="ignore")
df.sort_values(by=["date", "NCUSIP"], inplace=True)
df

In [None]:
# save to csv
df.to_csv('..\\models_data\\stock_price_monthly_2003_2024.csv', index=False)