In [1]:
import numpy as np
import pandas as pd
import sqlalchemy

from dtaidistance import dtw
from scipy import stats

from database_credentials import get_database_url
from load_transform import load_transform_combine, pivot

In [2]:
database_url = get_database_url()
engine = sqlalchemy.create_engine(database_url)
connection = engine.connect()

In [3]:
num_weeks = 7
sell_data = load_transform_combine(num_weeks, connection)
xticks = sell_data['weekday_observed'].unique()

trends = pivot(sell_data)
trends = trends.to_numpy()
trends = stats.zscore(trends) # standardize using z-score

In [4]:
num_trends = trends.shape[0]

distances = np.zeros((num_trends, num_trends))

# compute pairwise distances
for x, y in np.ndindex(distances.shape):
    if (x > y):
        distance = dtw.distance(trends[x], trends[y])
        distances[x][y] = distance
        distances[y][x] = distance

In [5]:
# visualize pairwise distances
distances_df = pd.DataFrame(distances)
distances_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,66,67,68,69,70,71,72,73,74,75
0,0.0,3.356853,2.513905,4.566109,4.633916,1.000702,3.49547,5.732625,5.949554,3.441137,...,3.197254,4.785002,3.896489,3.231087,5.550442,3.450594,2.731154,5.535373,3.513145,3.262808
1,3.356853,0.0,3.437345,2.715725,3.578869,3.251078,2.693419,2.637865,3.047948,1.879151,...,1.750081,2.635673,2.022848,2.500811,2.715437,1.812808,2.16817,2.53289,1.786944,4.527527
2,2.513905,3.437345,0.0,3.622007,3.101882,2.200406,2.148594,5.519928,5.750358,3.248374,...,3.268377,4.246648,3.995758,2.279485,5.533342,3.665752,2.637972,5.41647,3.615905,1.756798
3,4.566109,2.715725,3.622007,0.0,1.711333,4.077764,1.403827,2.793399,3.028383,2.450294,...,3.264212,2.502579,3.574364,2.468882,2.980112,3.556919,3.371393,2.923814,3.422601,4.582446
4,4.633916,3.578869,3.101882,1.711333,0.0,4.13349,1.599163,4.279944,4.007529,2.39795,...,4.043285,4.590352,4.55883,1.763497,3.627344,4.482765,4.164355,4.188158,4.295713,3.809139


In [6]:
np.save('data/distances', distances)

In [7]:
connection.close()
engine.dispose()