# Imports

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

# Helper functions

In [2]:
def TimeTransformation(tm):
    return float(tm.hour) + float(tm.minute)/60.0


def TimeTransformationList(tm_list):
    lst_to_rtrn = []
    tms = list(tm_list.tStamp)
    qtys = list(tm_list.qty)
    for idx in range(len(tms)):
        tm = TimeTransformation(tms[idx])
        for q in range(qtys[idx]):
            lst_to_rtrn.append(tm)
    return lst_to_rtrn

# Create a dataframe with Pandas

In [3]:
fl = "../../../data/ArmorOktoberfest.csv"
data = pd.read_csv(fl, index_col=False)

## What's inside this dataframe?

In [4]:
data.tail()

Unnamed: 0,tStamp,bType,qty
203,10/6/2018 23:34:26,Kolsch 12oz,2
204,10/6/2018 23:34:36,IPA 12oz,1
205,10/6/2018 23:37:57,Stout 12oz,2
206,10/6/2018 23:39:58,Pumpkin Chai Ale 12oz,1
207,10/6/2018 23:41:41,Stout 12oz,2


# Clean data
## Turn 'tStamp' to Pandas time 

In [5]:
# Before
data['tStamp']

0      9/20/2018 21:18:05
1      9/20/2018 21:18:13
2      9/20/2018 21:18:22
3      10/6/2018 13:11:24
4      10/6/2018 13:14:09
              ...        
203    10/6/2018 23:34:26
204    10/6/2018 23:34:36
205    10/6/2018 23:37:57
206    10/6/2018 23:39:58
207    10/6/2018 23:41:41
Name: tStamp, Length: 208, dtype: object

In [6]:
data.loc[:, 'tStamp'] = pd.to_datetime(data.tStamp)

In [7]:
# After
data['tStamp']

0     2018-09-20 21:18:05
1     2018-09-20 21:18:13
2     2018-09-20 21:18:22
3     2018-10-06 13:11:24
4     2018-10-06 13:14:09
              ...        
203   2018-10-06 23:34:26
204   2018-10-06 23:34:36
205   2018-10-06 23:37:57
206   2018-10-06 23:39:58
207   2018-10-06 23:41:41
Name: tStamp, Length: 208, dtype: datetime64[ns]

## Get rid of everything that happened before 06OCT2018

In [8]:
# Before
len(data)

208

In [9]:
up_to_date = pd.to_datetime("2018/10/6 13:59:59")
mask = data.tStamp > up_to_date
data = data.loc[mask, :]

In [10]:
# After
len(data)

200

## Clean up "bType"

In [None]:
# Before
len(data[data['bType'] == "Blonde"])

In [None]:
data.loc[data.bType == "Blonde", "bType"] = "Blonde 12oz"

In [None]:
# After
len(data[data['bType'] == "Blonde"])

# Compile graph info
## List of beers

In [None]:
b_type_vals = list(set(data.bType))

## Make list of all sales

In [None]:
all_sales = []
tStamp_n_qty = data.loc[:, ["tStamp", "qty"]]

for idx in range(len(tStamp_n_qty)):
    t = TimeTransformation(tStamp_n_qty.tStamp.iloc[idx])
    for ii in range(tStamp_n_qty.qty.iloc[idx]):
        all_sales.append(t)

len_all_sales = len(all_sales)

### What's in "all_sales"?
#### If there were 3 purchases at 2:30pm, then there would be 3 elements in "all_sales" with the value 14.5

In [None]:
all_sales

## Make a dictionary

In [None]:
sales_by_type = {
    'blonde': data.loc[data["bType"].str.contains("Blonde"), ['tStamp', 'qty']],
    'chai': data.loc[data["bType"].str.contains("Chai"), ['tStamp', 'qty']],
    'stout': data.loc[data["bType"].str.contains("Stout"), ['tStamp', 'qty']],
    'kolsch': data.loc[data["bType"].str.contains("Kolsch"), ['tStamp', 'qty']],
    'ipa': data.loc[data["bType"].str.contains("IPA"), ['tStamp', 'qty']],
    'pale': data.loc[data["bType"].str.contains("Pale"), ['tStamp', 'qty']]
}

for tp in sales_by_type.keys():
    sales_by_type[tp] = TimeTransformationList(sales_by_type[tp])

sales_by_type_values = list(sales_by_type.values())
sales_by_type_keys = list(sales_by_type.keys())
sales_by_type_sizes = [len(ss) for ss in sales_by_type.values()]

# Graphs and histograms
## Histogram of "all_sales"

In [None]:
output_dir = './06OCT2018ArmorOktoberfestOUT/'

str_4_1 = "There was a total of {} sales in {} hours."

print(str_4_1.format(len_all_sales, 10))
print()

fig = plt.figure()
plt.hist(all_sales, bins=20, range=(14, 24))

plt.title("Frequency of sales in intervals of 30min")

plt.xlabel('Hour')
plt.ylabel("Sale frequency")

plt.grid()
plt.xlim(13.75, 24.25)
plt.ylim(0, 26.5)

fig.savefig(output_dir + 'TotalSales30Min.pdf')

## Stacked histogram of "all_sales" by type

In [None]:
for kk in sales_by_type:
    print("There were {} sales of {}.".format(len(sales_by_type[kk]), kk))
print()

fig = plt.figure()
plt.hist(sales_by_type_values,
         bins=20,
         histtype='barstacked',
         range=(14, 24),
         label=sales_by_type_keys)

plt.title("Frequency of sales in intervals of 30min")

plt.ylabel('Frecuencia de venta por estilo')
plt.xlabel('Hora')

plt.grid()
plt.xlim(13.75, 24.25)
plt.ylim(0, 26.5)

plt.legend()

fig.savefig(output_dir + 'TotalSales30MinStacked.pdf')

## Pie chart of sales by type

In [None]:
for kk in sales_by_type:
    prntg = "{0:.2f}".format(len(sales_by_type[kk])/len(all_sales)*100.0)
    print("{}% of all sales were for type {}.".format(prntg, kk))
print()

fig = plt.figure()
plt.pie(sales_by_type_sizes,
        explode=[0.05*ii for ii in range(len(sales_by_type))],
        labels=[ss.capitalize() for ss in sales_by_type_keys],
        shadow=True,
        autopct='%1.1f%%')

plt.title('Total sales by type')
fig.savefig(output_dir + 'TotalSalesPieChart.pdf')


## All sales cumulative

In [None]:
fig = plt.figure()
plt.hist(all_sales,
         bins=20,
         range=(14, 24),
         cumulative=True)

plt.title('Cumulative frequency of all sales')

plt.xlabel('Hour')
plt.ylabel('Frequency')

plt.grid()
fig.savefig(output_dir + 'TotalSalesCumulative.pdf')

## All sales cumulative by type

In [None]:
# This is weird... what can be done to make it clearer?
fig = plt.figure()
plt.hist(sales_by_type_values,
         label=sales_by_type_keys,
         bins=20,
         range=(14, 24),
         cumulative=True)

plt.title('Total cumulative sales by type')

plt.xlabel('Hour')
plt.ylabel('Frequency')

plt.legend()
plt.grid()
fig.savefig(output_dir + 'TotalSalesCumulativeByType.pdf')

## All sales cumulative stacked

In [None]:
fig = plt.figure()
plt.hist(sales_by_type_values,
         label=sales_by_type_keys,
         bins=20,
         range=(14, 24),
         cumulative=True,
         stacked=True)
plt.title('Frecuencia cumulativa de todas las ventas encimado')
plt.xlabel('Hora')
plt.ylabel('Frecuencia')
plt.legend()
plt.grid()
fig.savefig(output_dir + 'TotalSalesCumulativeStacked.pdf')