# Magic Formula Investing: Implementation and Simulation

## Implementation in Python

In [1]:
# importing packages
# import sys
# print sys.version
import math
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import sklearn
import statsmodels.api as sm

import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

# special matplotlib argument for improved plots
from matplotlib import rcParams

In [2]:
df=pd.read_csv("Annual1990-2015.csv")

In [3]:
df.head()

Unnamed: 0,gvkey,datadate,fyear,indfmt,consol,popsrc,datafmt,tic,cusip,conm,...,cstk,dlc,dltt,ebit,ppent,pstk,wcap,costat,mkvalt,sic
0,1004,5/31/90,1989,INDL,C,D,STD,AIR,361105,AAR CORP,...,16.082,33.821,72.329,46.851,63.441,0,184.932,A,,5080
1,1004,5/31/91,1990,INDL,C,D,STD,AIR,361105,AAR CORP,...,16.097,16.5,68.953,33.701,63.415,0,189.172,A,,5080
2,1004,5/31/92,1991,INDL,C,D,STD,AIR,361105,AAR CORP,...,16.105,25.005,67.323,26.53,60.422,0,197.246,A,,5080
3,1004,5/31/93,1992,INDL,C,D,STD,AIR,361105,AAR CORP,...,16.205,25.025,66.298,16.343,56.052,0,193.399,A,,5080
4,1004,5/31/94,1993,INDL,C,D,STD,AIR,361105,AAR CORP,...,16.215,0.568,115.729,21.824,54.783,0,240.009,A,,5080


In [4]:
print len(df)

316633


# Cleaning the Data

In [5]:
# remove SIC Division H Companies: Finance, Insurance, and Real Estate
df_below_6000=df[df['sic']<6000]
df_above_7000=df[df['sic']>=7000]
# print len(df_below_6000)
# print len(df_above_7000)
df=pd.concat([df_below_6000,df_above_7000])
# df.head()
# print len(df)

In [6]:
# remove SIC Division E Companies: Transportation, Communications, Electric, Gas, and Sanitary Services
df_below_4000=df[df['sic']<4000]
df_above_5000=df[df['sic']>=5000]
# print len(df_below_4000)
# print len(df_above_5000)
df=pd.concat([df_below_4000,df_above_5000])
# df.head()
# print len(df)

In [7]:
df['ebit'].isnull().sum()

13344

In [8]:
# basic function to calculate ratio 1 across a row
# ratio 1 = EBIT / (NFA + NWC)
# EBIT = earnings before interest and taxes, after subtracting depreciation and amortization
# NFA = net fixed assets = net book value of Property Plant and Equipment (PPENT)
# NMW = net working capital = working capital - cash = WCAPQ - CHQ
def ratio_one(row):
    if math.isnan(row['ebit']) or math.isnan(row['ppent']) or math.isnan(row['wcap']) or math.isnan(row['ch']) or row['ppent']+row['wcap']-row['ch']==0:
        ratio = 0
    else: 
        ratio=row['ebit']/(row['ppent']+row['wcap']-row['ch'])
    return ratio

In [9]:
# add the ratio1 column to the dataframe
df['ratio1']=df.apply(ratio_one,axis=1)

In [10]:
# check the validities of the ratio1 values
print len(df[df['ratio1']==0])
print df['ratio1'].isnull().sum()

19885
0


In [12]:
# basic function to calculate ratio 2 across a row
# ratio 2 = EBIT / EV
# EV = enterprise value = equity + debt
# equity = common stock capital + preferred stock capital = CSTK + PSTK
# net debt = long-term debt + net current debt - cash = DLTT + DLC - CH
def ratio_two(row):
    if math.isnan(row['ebit']) or row['cstk']+row['pstk']+row['dltt']+row['dlc']-row['ch']==0 or math.isnan(row['cstk']+row['pstk']+row['dltt']+row['dlc']-row['ch']):
        ratio = 0
    else: 
        ratio=row['ebit']/(row['cstk']+row['pstk']+row['dltt']+row['dlc']-row['ch'])
    return ratio

In [13]:
df['ratio2']=df.apply(ratio_two,axis=1)

In [14]:
print len(df[df['ratio2']==0])
print df['ratio2'].isnull().sum()

19848
0


In [15]:
df.to_csv("CleanedAnnual1990-2015.csv")

# Moving forward:
1. Rank companies based on ratio1 for given year (From high-better, to low-worse) 
2. Rank companies based on ratio2 for given year
3. Add the two rankings to get company's overall status for the year
4. Year over year, buy high-ranking companies and sell low-ranking companies
## have pricing data from 1990 (on monthly basis) in CSV file
## have adjustment data from 1990 (on quarterly basis) in CSV file
1. Adjust the prices based on AJEX
2. Run simulations based on ^^ 