In [33]:
import pandas as pd
import numpy as np
import seaborn as sns
from pandasql import sqldf
from sklearn import linear_model
import statsmodels.api as sm
from scipy import stats

In [34]:
test = pd.read_sas('3v3.sas7bdat', encoding='latin-1')
test

Unnamed: 0,GVKEY,DATADATE,FYEAR,LPERMNO,SIC,AJEX,ACT,AT,CEQ,CHE,...,MIB,PSTK,XSGA,CSHO,CONSOL,INDFMT,DATAFMT,POPSRC,CURCD,COSTAT
0,001001,1983-12-31,1983.0,10015.0,5812,1.000000,4.807,14.080,7.823,4.280,...,0.0,0.0,16.435,3.568,C,INDL,STD,D,USD,I
1,001001,1984-12-31,1984.0,10015.0,5812,1.000000,2.789,16.267,8.962,1.986,...,0.0,0.0,20.628,3.568,C,INDL,STD,D,USD,I
2,001001,1985-12-31,1985.0,10015.0,5812,1.000000,3.852,39.495,13.014,2.787,...,0.0,0.0,33.021,3.988,C,INDL,STD,D,USD,I
3,001003,1983-12-31,1983.0,10031.0,5712,1.000000,8.273,8.529,6.095,2.023,...,0.0,0.0,3.186,2.683,C,INDL,STD,D,USD,I
4,001003,1984-12-31,1984.0,10031.0,5712,1.000000,7.817,8.241,6.482,0.844,...,0.0,0.0,4.014,2.683,C,INDL,STD,D,USD,I
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285868,345920,2021-12-31,2021.0,20194.0,3524,1.000000,269.384,891.242,635.180,28.384,...,0.0,0.0,67.945,44.618,C,INDL,STD,D,USD,A
285869,345980,2020-12-31,2020.0,20333.0,5961,0.033333,2314.000,2397.000,1027.000,2129.000,...,0.0,0.0,2225.000,587.000,C,INDL,STD,D,USD,A
285870,345980,2021-12-31,2021.0,20333.0,5961,0.033333,1224.000,1283.000,818.000,1168.000,...,0.0,0.0,1469.000,658.000,C,INDL,STD,D,USD,A
285871,347007,2021-12-31,2021.0,15533.0,2836,1.000000,334.347,468.910,-242.173,317.116,...,0.0,0.0,,397.830,C,INDL,STD,D,USD,A


### Noa and **dNoa**, (changes in) net operating assets
We measure net operating assets as operating assets minus operating liabilities. Operating assets are total assets (Compustat annual item AT) minus cash and short-term investment (item CHE). Operating liabilities are total assets minus debt included in current liabilities (item DLC, zero if missing), minus long-term debt (item DLTT, zero if missing), minus minority interests (item MIB, zero if missing), minus preferred stocks (item PSTK, zero if missing), and minus common equity (item CEQ). Noa is net operating assets scalded by 1-year-lagged total assets. Changes in net operating assets, dNoa, is the annual change in net operating assets scaled by 1-year-lagged total assets. At the end of June of each year t, we sort stocks into deciles based on Noa, and separately, on dNOA, for the fiscal year ending in calendar year t −1. Monthly decile returns are computed from July of year t to June of t +1, and the deciles are rebalanced in June of t +1

In [35]:
def fill_zero(var):
    temp = test[var].isna().sum()
    test[var].fillna(0, inplace=True)
    print("NAN values for " + str(var) + ": " + str(temp))
    temp = test[var].isna().sum()
    print("NAN values for " + str(var) + " filled: " + str(temp))

In [36]:
fill_zero("DLC")
fill_zero("DLTT")
fill_zero("MIB")
fill_zero("PSTK")

NAN values for DLC: 10465
NAN values for DLC filled: 0
NAN values for DLTT: 8844
NAN values for DLTT filled: 0
NAN values for MIB: 23237
NAN values for MIB filled: 0
NAN values for PSTK: 12034
NAN values for PSTK filled: 0


In [37]:
operating_assets = test["AT"] - test["CHE"]
operating_liabilities = test["AT"] - test["DLC"] - test["DLTT"] - test["MIB"] - test["PSTK"] - test["CEQ"]

Noa = operating_assets - operating_liabilities

lagged_total_assets = test["AT"].shift(1)  # 1-year-lagged total assets
dNoa = (Noa - Noa.shift(1)) / lagged_total_assets

test["dNoa"] = dNoa
test["dNoa"].fillna(0, inplace=True)

test["dNoa"]

0         0.000000
1         0.237713
2         1.150612
3        -0.638030
4         0.154297
            ...   
285868    2.249312
285869   -2.046996
285870    0.302461
285871    0.320635
285872   -0.190736
Name: dNoa, Length: 285873, dtype: float64

In [38]:
deciles_dNoa = pd.qcut(test["dNoa"], 10, labels=False, duplicates="drop")
nanvalues = deciles_dNoa.isna().sum()
allvalues = deciles_dNoa.count()

difference = nanvalues/allvalues
difference

  diff_b_a = subtract(b, a)


0.25000765204615716

In [39]:
# monthly_returns = pd.DataFrame()  # Assuming you have monthly returns data
# monthly_returns["Date"] = pd.date_range(start="2023-07-01", periods=12, freq="M")
# monthly_returns["Decile_dNoa"] = deciles_dNoa.values
# monthly_returns["Monthly_Return"] = 0.05

In [40]:
# test["dNoa"] = (test["AT"] - test["CHE"]) - (test["AT"] - test["DLC"] - test["DLTT"] - test["MIB"] - test["PSTK"] - test["CEQ"])
# test["dNoa"]

In [41]:
# test['dNoa_decile'] = pd.qcut(test['dNoa'], 10, labels=False)
# test['dNoa_decile']

### **Nsi**, net stock issues
At the end of June of year t, we measure net stock issues, Nsi, as the natural log of the ratio of the split-adjusted shares outstanding at the fiscal year ending in calendar year t −1 to the split-adjusted shares outstanding at the fiscal year ending in t −2. The split-adjusted shares outstanding is shares outstanding (Compustat annual item CSHO) times the adjustment factor (item AJEX). At the end of June of each year t, we sort stocks with negative Nsi into two portfolios (1 and 2), stocks with zero Nsi into 1 portfolio (3), and stocks with positive Nsi into seven portfolios (4 to 10). Monthly decile returns are from July of year t to June of t +1, and the deciles are rebalanced in June of t +1.

In [42]:
test["CSHO_t_minus_1_adjusted"] = test["CSHO"] * test["AJEX"]

test_shifted = test.shift(periods=1)
test["CSHO_t_minus_2_adjusted"] = test_shifted["CSHO"] * test_shifted["AJEX"]

test["Nsi"] = np.log(test["CSHO_t_minus_1_adjusted"] / test["CSHO_t_minus_2_adjusted"])
test["Nsi"]

  result = getattr(ufunc, method)(*inputs, **kwargs)


0              NaN
1         0.000000
2         0.111285
3        -0.396354
4         0.000000
            ...   
285868    0.286592
285869   -0.824310
285870    0.114180
285871    2.898017
285872   -3.874721
Name: Nsi, Length: 285873, dtype: float64

In [49]:
test["Nsi_f"] = np.where(test["Nsi"] < 0, np.where(test["Nsi"] < test["Nsi"].quantile(0.5), 1, 2),
                             np.where(test["Nsi"] == 0, 3,
                                      np.where(test["Nsi"] <= test["Nsi"].quantile(0.1), 4,
                                               np.where(test["Nsi"] <= test["Nsi"].quantile(0.2), 5,
                                                        np.where(test["Nsi"] <= test["Nsi"].quantile(0.3), 6,
                                                                 np.where(test["Nsi"] <= test["Nsi"].quantile(0.4), 7,
                                                                          np.where(test["Nsi"] <= test["Nsi"].quantile(0.5), 8,
                                                                                   np.where(test["Nsi"] <= test["Nsi"].quantile(0.6), 9, 10))))))))
test["Nsi_f"]

0         10
1          3
2         10
3          1
4          3
          ..
285868    10
285869     1
285870    10
285871    10
285872     1
Name: Nsi_f, Length: 285873, dtype: int32

### dNco, **dNca**, and dNcl, changes in net noncurrent operating assets, in noncurrent operating assets, and in noncurrent operating liabilities.
dNco is the change in net noncurrent operating assets. Net noncurrent operating assets are noncurrent operating assets (Nca) minus noncurrent operating liabilities (Ncl), with Nca = total assets (Compustat annual item AT) − current assets (item ACT) − long-term investments (item IVAO), and Ncl = total liabilities (item LT) − current liabilities (item LCT) − long-term debt (item DLTT). dNca is the change in noncurrent operating assets, and dNcl is the change in noncurrent operating liabilities. Missing changes in long-term investments and long-term debt are set to zero. At the end of June of each year t, we sort stocks into deciles based, on dNco, dNca, and dNcl for the fiscal year ending in calendar year t −1, all scaled by total assets for the fiscal year ending in calendar year t −2. Monthly decile returns are from July of year t to June of t +1, and the deciles are rebalanced in June of t +1.


In [43]:
fill_zero("IVAO")

NAN values for IVAO: 50623
NAN values for IVAO filled: 0


In [44]:
test["Nca"] = test["AT"] - test["ACT"] - test["IVAO"]
test["Nca"]

0           9.182
1          13.400
2          35.616
3           0.256
4           0.424
           ...   
285868    621.858
285869     79.000
285870     42.000
285871    134.563
285872      0.190
Name: Nca, Length: 285873, dtype: float64

In [50]:
#Need to calculate dNca
test["dNca"] = test["Nca"].diff()
test["dNca"].fillna(0, inplace=True)
test["dNca"]

0           0.000
1           4.218
2          22.216
3         -35.360
4           0.168
           ...   
285868    545.980
285869   -542.858
285870    -37.000
285871     92.563
285872   -134.373
Name: dNca, Length: 285873, dtype: float64

### dFin, dSti, dLti, **dFnl**, and dBe, changes in net financial assets, in short-term investments, in long-term investments, in financial liabilities, and in book equity. 
dFin is the change in net financial assets. Net financial assets are financial assets (Fna) minus financial liabilities (Fnl), with Fna = short-term investments (Compustat annual item IVST) + long-term investments (item IVAO), and Fnl = long-term debt (item DLTT) + debt in current liabilities (item DLC) + preferred stock (item PSTK). dSti is the change in short-term investments, dLti is the change in long-term investments, and dFnl is the change in financial liabilities. dBe is the change in book equity (item CEQ). Missing changes in debt in current liabilities, long-term investments, long-term debt, short-term investments, and preferred stocks are set to zero (at least 1 change must be non-missing). When constructing dSti (dLti), we exclude firms that do not have short-term (long-term) investments in the past two fiscal years. At the end of June of each year t, we sort stocks into deciles based, separately, on dFin, dSti, dLti, dFnl, and dBe for the fiscal year ending in calendar year t −1, all scaled by total assets (item AT) for the fiscal year ending in calendar year t −2. Monthly decile returns are calculated from July of year t to June of t +1, and the deciles are rebalanced in June of t +1. For sufficient data coverage, the dSti portfolios start in July 1971.

In [46]:
fill_zero("DLTT")
fill_zero("DLC")
fill_zero("PSTK")

NAN values for DLTT: 0
NAN values for DLTT filled: 0
NAN values for DLC: 0
NAN values for DLC filled: 0
NAN values for PSTK: 0
NAN values for PSTK filled: 0


In [47]:
test["Fnl"] = test["DLTT"] + test["DLC"] + test["PSTK"]
test["Fnl"]

0           4.864
1           4.778
2          20.244
3           1.200
4           0.950
           ...   
285868    167.573
285869     52.000
285870     25.000
285871    645.664
285872      0.169
Name: Fnl, Length: 285873, dtype: float64

In [52]:
test["dFnl"] = test["Fnl"].diff()
test["dFnl"].fillna(0, inplace=True)
test["dFnl"]

0           0.000
1          -0.086
2          15.466
3         -19.044
4          -0.250
           ...   
285868    147.516
285869   -115.573
285870    -27.000
285871    620.664
285872   -645.495
Name: dFnl, Length: 285873, dtype: float64

In [48]:
# dlc = test['DLC'].isna().sum()
# test['DLC'].fillna(0, inplace=True)
# print("NAN values: " + str(dlc))
# dlc = test['DLC'].isna().sum()
# print("NAN values: " + str(dlc))