# PDQ function
- finds the best pdq combination

In [None]:
def arma_fitting(data, convergenceWarning=True, summary=False):
    import warnings
    from statsmodels.tools.sm_exceptions import ConvergenceWarning
    from statsmodels.tsa.arima.model import ARIMA
    '''
    Function loops through finding the best p, d, q sequence
    convergenceWarning: True = ignore convergence warnings, False = don't ignore
    Summary: print for each pdq check
    '''
    results = []
    for p in range(5):
        for d in range(5):
            for q in range(5):
                # try, except to catch warnings and errors
                try:
                    # Catching Warnings
                    with warnings.catch_warnings(record=True) as warn:
                        if convergenceWarning: 
                            warnings.simplefilter("ignore", ConvergenceWarning)
                        model = ARIMA(data, order=(p, d, q)).fit()
                    # If there is a warning, remove
                    if warn: 
                        if summary:
                            print(f"\n\033[1m\033[93m{warn[0].message}\nWarning while fitting ARIMA model with p={p}, d={d}, q={q}. Skipping\033[0m")
                            print(f"{'-'*100}\n")
                        else:
                            continue
                    # No Warning, keep
                    else:    
                        air_score = model.aic
                        results.append((air_score, p, d, q))
                        if summary:
                            print("P of: ",p)
                            print("d of: ",d)
                            print("q of: ",q)
                            print("AIR score: ", air_score)
                            print(f"{'-'*100}\n")

                except Exception as e:
                    print(f"\033[1m\033[91m{e}\nFailed to fit ARIMA model with p={p}, d={d}, q={q}. Skipping\033[0m")
                    print(f"{'-'*100}\n")

    results.sort()
    print("Best p, d, q for lowest AIR score: ")
    print("P of: ", results[0][1])
    print("d of: ", results[0][2])
    print("q of: ", results[0][3])

In [None]:
import itertools
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from joblib import Parallel, delayed
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning

def sarimax_fitting(data, S, n, convergenceWarning=True, summary=False, n_jobs=-1):
    
    # replacing for loop with array
    options = np.array(list(itertools.product(np.arange(n), repeat=6)))

    def fit_model(p, d, q, P, D, Q, S, summary, convergenceWarning):
        # try, except to catch warnings and errors
        try:
            # Catching Warnings
            warnings.filterwarnings("ignore", message= "No frequency information was provided, so inferred frequency MS will be used.")
            with warnings.catch_warnings(record=True) as warn:
                if convergenceWarning: 
                    warnings.simplefilter("ignore", ConvergenceWarning)
                model = SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S),
                               enforce_stationarity=False, enforce_invertibility=False).fit()

            # If there is a warning, remove
            if warn: 
                if summary:
                    print(f"\n\033[1m\033[93m{warn[0].message}\nWarning while fitting ARIMA model with p={p}, \
                    d={d}, q={q}, P={P}, D={D}, Q={Q}, s={S}. Skipping\033[0m")
                    print(f"{'-'*100}\n")
                else:
                    pass
            # No Warning, keep
            else:    
                air_score = model.aic
                if summary:
                    sys.stdout.write(f"({p},{d},{q})\n({P},{D},{Q},{S})")
                    print("AIC score: ", air_score)
                    print(f"{'-'*100}\n")
                return ((air_score, (p, d, q), (P, D, Q, S)))

        except Exception as e:
            print(f"\033[1m\033[91m{e}\nFailed to fit ARIMA model with p={p}, \
                  d={d}, q={q}, d={d}, P={P}, D={D}, Q={Q}, s={S}. Skipping\033[0m")
            print(f"{'-'*100}\n")
            
    # running in parallel
    results = Parallel(n_jobs=n_jobs, verbose=1)(
        delayed(fit_model)(p, d, q, P, D, Q, S, summary, convergenceWarning)
        for p, d, q, P, D, Q in options)
    
    results.sort()
    print(f"Best p, d, q and P, D, Q, s for lowest AIC score: {results[0][0]}")
    print(f"pdq= {results[0][1]}")
    print(f"PDQs= {results[0][2]}")

In [None]:
def stationary(*args):
    pv = []
    for data in args:
        if adfuller(data)[1]< .05:
            print(f'{adfuller(data)[1]< .05} With a pvlaue of {adfuller(data)[1]}\n')
            pv.append(adfuller(data)[1])
        else:
            print(f'{False}\n')
    if pv:
        print(min(pv))

# Time Series Performance

In [None]:
def ts_performance(data, pdq):
    '''
    pdq : (p,d,q)
    '''
    arma = ARIMA(data_diff,order= pdq).fit()
    print(arma.mle_retvals)
    print(arma.summary())
    display(arma.plot_diagnostics(figsize=(16, 8)))


# Plotting each individual row

In [None]:
def microplot(data, X, y_sort, limit=20, **kwargs):
    '''
    data : dataframe
    x : x axis value
    y_sort : y axis values and what the data gets sorted by
    limit : limt for x output, default is 20
    '''
    
    df_sorted = data.sort_values(by=y_sort, ascending=False, inplace=False)[:limit]
    x_order = df_sorted[X].values.tolist()
    fig, ax = plt.subplots(figsize=(18,12))
    ax = sns.barplot(x= X, y= y_sort, data=df_sorted, palette= 'YlOrRd_r', order=x_order, **kwargs)
    ax.set_title(f'{y_sort} vs. {X}', fontsize=15)
    ax.set_xlabel(data[X].name, fontsize=15)
    ax.set_ylabel(y_sort, fontsize=15)

# Plotting each grouped data
- Example is several houses are in the same state so groupby state and calculate the mean

In [None]:
def macroplot(data, group, y, limit=None ,**kwargs):
    '''
    group is the groupby, y is the what aggregate by so price, size
    data : dataframe
    group : groupby column
    y : y column or target
    limit : limit for x output
    '''
    states_df = data.groupby(by= group)[y].mean()
    df_sorted = states_df.sort_values(ascending=False)[:limit]
    x_order = df_sorted.index.tolist()
        
    fig, ax = plt.subplots(figsize=(18,12))
    ax = sns.barplot(x=df_sorted.index, y= df_sorted.values, palette= 'YlOrRd_r', order=x_order, **kwargs)
    ax.set_title(f'{y} vs. {group}', fontsize=15)
    ax.set_xlabel(group, fontsize=15)
    ax.set_ylabel(y, fontsize=15)

# Old Functions
1. The original sarimax function - no parallel processing, takes much longer, summary prints do work though

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
import time
def pdqs_fitting(data, S, n, convergenceWarning=True, summary=False):
    import warnings
    from statsmodels.tools.sm_exceptions import ConvergenceWarning
    '''
    Function loops through finding the best p, d, q sequence
    convergenceWarning: True = ignore convergence warnings, False = don't ignore
    Summary: print for each pdq check
    '''
    results = []
     key=0
    for p in range(n):
        for d in range(n):
            for q in range(n):
                for P in range(n):
                    for D in range(n):
                        for Q in range(n):
                            key+=1
                            # try, except to catch warnings and errors
                            try:
                                # Catching Warnings
                                warnings.filterwarnings("ignore", message= "No frequency information was provided, so inferred frequency MS will be used.")
                                with warnings.catch_warnings(record=True) as warn:
                                    if convergenceWarning: 
                                        warnings.simplefilter("ignore", ConvergenceWarning)
                                    model = SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S),
                                                   enforce_stationarity=False,
                                                    enforce_invertibility=False).fit()
                                # If there is a warning, remove
                                if warn: 
                                    if summary:
                                        print(f"\n\033[1m\033[93m{warn[0].message}\nWarning while fitting ARIMA model with p={p}, d={d}, q={q}, P={P}, D={D}, Q={Q}, s={S}. Skipping\033[0m")
                                        print(f"{'-'*100}\n")
                                    else:
                                        continue
                                # No Warning, keep
                                else:    
                                    air_score = model.aic
                                    results.append((air_score, (p, d, q), (P, D, Q, S)))
                                    if summary:
                                        print(f"({p},{d},{q})\n({P},{D},{Q},{S})")
                                        print("AIC score: ", air_score)
                                        print(f"{'-'*100}\n")

                            except Exception as e:
                                print(f"\033[1m\033[91m{e}\nFailed to fit ARIMA model with p={p}, d={d}, q={q}, d={d}, P={P}, D={D}, Q={Q}, s={S}. Skipping\033[0m")
                                print(f"{'-'*100}\n")

    results.sort()
    print(f"Best p, d, q and P, D, Q, s for lowest AIC score: {results[0][0]}")
    print(f"pdq= {results[0][1]}")
    print(f"PDQs= {results[0][2]}")
    print(key)
    return results