In [1]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from typing import TypeVar, Tuple

pds = TypeVar('pandas.core.series.Series')
smw = TypeVar('statsmodels.regression.linear_model.RegressionResultsWrapper')



def reg_model (X :pds, Y :pds)->Tuple[smw, str, float]:
    """A little helper function which calculates an ordinary least squares
    fit model, based on two variables.  Parameters: X values as pandas
    data series, Y values as pandas data series. It returne the full
    regression model, and a string.  The string can be used to display
    the regression euqtion, the r-square and the p-value
    """
    
    # calculate the regression model. The add_constant term is needed
    # to calculate the y-intercept
    model :smw  = sm.OLS(Y,sm.tools.add_constant(X)).fit()
    yp :pds = Y

    # extract the data we are interested in
    p  :float = model.pvalues[1] # the p-value
    r2 :float = model.rsquared   # the r-squared value
    s  :float = model.params[1]  # the slope
    y0 :float = model.params[0]  # the y-intercept

    # calculate the predicted Y-values
    yp :pds = y0 + s * X
        
    # create the display string
    ds : str  = (f"y = {y0:1.4f}+x*{s:1.4f}\n"
                 f"$r^2$ = {r2:1.2f}\n"
                 f"p = {p:1.4f}")
    
    return (model, ds, yp)



%matplotlib inline
plt.figure(figsize=(5, 4))
sns.set(style='darkgrid')

fn :str = "storks_vs_birthrate.csv" # file name
df = pd.read_csv(fn)
df.head()

X :pds = df["Birth rate (10^3 /yr)"]
Y :pds = df["Storks (pairs)"]

model, ds, yp = reg_model(X,Y)
print(ds)
ax = sns.regplot(X,Y)
ax.text(100,40000,ds,verticalalignment='top')
plt.tight_layout()
plt.savefig("ggg1.pdf")
plt.show()

In [5]:
from typing import TypeVar
import os  # no need to alias, since os is already short
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt 

pdf = TypeVar('pandas.core.frame.DataFrame')
pds = TypeVar('pandas.core.series.Series')

# lets get some data
fn :str = "storks_vs_birth_rate.csv" # file name

# this little piece of code could have saved me 20 minutes
if os.path.exists(fn): # check if the file is actually there
     df :pdf = pd.read_csv(fn) # read data
     print(df.head())
else:
     print("\n ------------------------------- \n")
     print(f"{fn} not found")
     print("\n ------------------------------- \n")
     exit()

SyntaxError: invalid syntax (<ipython-input-5-525dec16fd40>, line 17)

In [2]:
import statsmodels.api as sm
import statsmodels.formula.api as smf


In [3]:
f = "Storks (pairs) ~ Birth rate (10^3 /yr)"
mod = smf.ols(f,data=df)
res = mod.fit()
print(res.summary)

SyntaxError: invalid syntax (<unknown>, line 1)