# Examples on using MTSDataModel

## Load modules

In [1]:
import sys
sys.path.insert(0,r'../src/')
import MTSDataModel as mts

In [2]:
import pandas as pd
import numpy as np
%matplotlib inline
from matplotlib import pyplot as plt
import rpy2

## Get data

In [3]:
filename = 'sample_data.csv'
colnames = {'level1': 'Variable', 'level2': 'Country', 'value': 'Value','index': 'Date'}
do = mts.MTSDataModel(filepath = filename, colnames = colnames)

In [4]:
df = do.ReturnDf()
df.head()

Unnamed: 0_level_0,GDP,GDP,Inflation,Inflation,Inflation,StockPrices,StockPrices,StockPrices
Unnamed: 0_level_1,DEU,FIN,AUT,DEU,FIN,AUT,DEU,FIN
1970-01-01,,63557250000.0,25.154467,31.6201,12.784633,11.314692,10.873145,1.247428
1970-04-01,,65683280000.0,25.412633,31.900933,12.861467,11.390358,9.807037,1.306829
1970-07-01,,67061840000.0,25.7815,31.9478,12.9462,12.117531,9.271091,1.34841
1970-10-01,,67900940000.0,26.002767,32.252033,13.017667,12.248969,8.835394,1.401871
1971-01-01,,64375050000.0,26.260967,33.024367,13.290433,12.077389,9.699077,1.490974


## Pre-processing

### Deflate

In [5]:
do.DeflateVariables(['StockPrices'], infvar = 'Inflation')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  frame['X_hat'] = self.Deflate(frame)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


### De-trend

In [6]:
do.DetrendVariables(['StockPrices_def','GDP'], difftype = 'ld')

## Feature engineering

### Wavelet MRA decompositions

In [None]:
do.MRADecomposition(variables = ['GDP_ld1'], entities=['FIN'],levels = 6,expanding='none')
#do.MRADecomposition(variables = ['GDP_ld1'],levels = 6,expanding='none') # bug here

### Dimension reduction

In [None]:
do.ReduceVariableDimension(suffix = 'PC', variables = ['StockPrices_def_ld1','GDP_ld1'] , entities = ['FIN','DEU'])

### Sum variables

In [None]:
#variables = {"gdp+stock": ['GDP_ld1','StockPrices_def_ld1']}
#do.SumVariables(variables)

#df = do.ReturnDf()
#df.head()

In [37]:
class MyException(Exception):
    pass

def EntitiesDefault(variables):
    """
    Helper function to extract entities for which 
    all chosen variables exist.
    """
    entities = []
    # All possible entities
    all_entities = list(np.unique(df.columns.get_level_values(1).values))
    # Loop over all_entities
    for crtentity in all_entities:
        # If variables belong to list of all variables under current entity,
        # append current entity to entities
        if set(variables).issubset(list(df.iloc[:, df.columns.get_level_values(1).isin([crtentity])].columns.get_level_values(0))):
            entities.append(crtentity)
    return entities

def VariablesCheck(variables,entities):
    """Helper function to check chosen variables exist for chosen entities."""
    for entity in entities:
        frame = df.iloc[:, (df.columns.get_level_values(0).isin(variables)) & (df.columns.get_level_values(1) == entity)].copy()       
        if set(variables).issubset(frame.columns.get_level_values(0)) == False:
            raise MyException("Not all variables present for " + entity + ".")             

def SumVariables(df,variables,name,entities=None):
    """
    Aggregate given variables, under same entity, using simple sum.

    variables is a dict with key being new variable name and value designating
    variables to be summed together.
    """
    # If no entities selected, get those for which all given variables exists
    if entities == None:
        entities = EntitiesDefault(variables)
    # If entities selected, check that all variables exist for them
    else:
        VariablesCheck(variables,entities)

    for entity in entities:
        crt_frame = df.iloc[:, (df.columns.get_level_values(0).isin(variables)) & (df.columns.get_level_values(1) == entity)]
        crt_frame[name, entity] = crt_frame.sum(axis=1,skipna = False)
        df = pd.merge(df, crt_frame[name, entity], left_index = True, right_index = True, how = 'left')
    return df

Unnamed: 0_level_0,GDP,GDP,Inflation,Inflation,Inflation,StockPrices,StockPrices,StockPrices,StockPrices_def,StockPrices_def,StockPrices_def,StockPrices_def_ld1,StockPrices_def_ld1,StockPrices_def_ld1,GDP_ld1,GDP_ld1
Unnamed: 0_level_1,DEU,FIN,AUT,DEU,FIN,AUT,DEU,FIN,AUT,DEU,FIN,AUT,DEU,FIN,DEU,FIN
1970-01-01,,63557250000.0,25.154467,31.6201,12.784633,11.314692,10.873145,1.247428,11.314692,10.873145,1.247428,,,,,


In [46]:
df = do.ReturnDf()
df.head(1)

variables = ['GDP_ld1','StockPrices_def_ld1']
name = "gdp+stock"
#entities = None
entities = ['FIN']

df = SumVariables(df.copy(),variables,name=name,entities=entities)
df.tail(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,GDP,GDP,Inflation,Inflation,Inflation,StockPrices,StockPrices,StockPrices,StockPrices_def,StockPrices_def,StockPrices_def,StockPrices_def_ld1,StockPrices_def_ld1,StockPrices_def_ld1,GDP_ld1,GDP_ld1,gdp+stock
Unnamed: 0_level_1,DEU,FIN,AUT,DEU,FIN,AUT,DEU,FIN,AUT,DEU,FIN,AUT,DEU,FIN,DEU,FIN,FIN
2018-10-01,2993535000000.0,204139000000.0,117.4735,111.655467,111.8385,130.35324,98.342199,108.417365,27.912391,27.849869,12.393552,-0.081448,-0.101685,-0.098282,0.004956,0.005274,-0.093009
2019-01-01,3009343000000.0,204883200000.0,116.7358,110.76445,111.57555,127.735845,97.972692,,27.524779,27.968417,,-0.013984,0.004248,,0.005267,0.003639,


## Plot

In [None]:
fig = plt.figure(figsize=(15,7))
ax = fig.add_subplot(1,1,1)
do.PlotVariables(variables=['StockPrices_def_ld1','GDP_ld1','PC','GDP_ld1_wl5'], entities=['FIN'], ax=ax)