## Analysis of Days of Week based on Fremont Bicycle Data
Treating crossings each day as features to learn about the relationships between various days

In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import os
import sklearn
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture

## Get Data

In [None]:
FILENAME = '../data/Fremont_Bridge_Hourly_Bicycle_Counts_by_Month_October_2012_to_present.csv'
URL = ''

def get_fremont_data(filename=FILENAME, url=URL, force_download=False):
    if force_download or not os.path.exists(filename):
        urlretrieve(url, filename)
    data = pd.read_csv(filename, index_col='Date', parse_dates=True)
    data.columns = ['West', 'East']
    data['Total'] = data['West'] + data['East']
    return data


In [None]:
data = get_fremont_data()
data.head()

In [None]:
plt.style.use('seaborn')
data.resample('W').sum().plot()

In [None]:
ax = data.resample('D').sum().rolling(365).sum().plot()
ax.set_ylim(0, None)

In [None]:
data.groupby(data.index.time).mean().plot()

In [None]:
pivoted = data.pivot_table('Total', index=data.index.time, columns=data.index.date)
pivoted.iloc[:5, :5]

In [None]:
pivoted.plot(legend=False, alpha=0.01)

## Principle Component Analysis

In [None]:
X = pivoted.fillna(0).T.values
X.shape

In [None]:
X2 = PCA(2, svd_solver='full').fit_transform(X)

In [None]:
X2.shape

In [None]:
plt.scatter(X2[:, 0], X2[:, 1])

## Unsupervised Clustering

In [None]:
qmm = GaussianMixture(2)
qmm.fit(X)
labels = qmm.predict(X)
labels

In [None]:
plt.scatter(X2[:, 0], X2[:, 1], c=labels, cmap='rainbow')
plt.colorbar()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(14, 6))

pivoted.T[labels == 0].T.plot(legend=False, alpha=0.1, ax=ax[0])
pivoted.T[labels == 1].T.plot(legend=False, alpha=0.1, ax=ax[1])

ax[0].set_title('Purple Cluster')
ax[1].set_title('Red Cluster')

## Comparing with Day of Week

In [None]:
dayofweek = pd.DatetimeIndex(pivoted.columns).dayofweek
plt.scatter(X2[:, 0], X2[:, 1], c=dayofweek, cmap='rainbow')
plt.colorbar()

## Analyzing Outliers
The following points are weekdays with holiday-like pattern

In [None]:
dates = pd.DatetimeIndex(pivoted.columns)
dates[(labels == 1) & (dayofweek < 5)]


What's up with Feb 6, 2017? [Snow Storm](https://www.seattletimes.com/seattle-news/weather/weather-service-predicts-3-to-6-inches-of-snow-in-seattle-area/)