In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
from matplotlib import rcParams, cm

In [2]:
data = pd.read_csv('./data/event_transactions_CB.csv')
data.sample(10)

Unnamed: 0,event_id,attendee_id,order_id,purchase_quad,purchase_type,spend_cat,age_cat,gender_cat,attendee_frequency,attendee_zip,Latitude,Longitude
4586244,54,25047,17014,3,1,2,0,0,1,32708,28.683408,-81.28151
703530,24,22993,6249,3,1,1,0,0,1,30735,34.604648,-84.91926
3599206,47,336512,9418,3,2,2,3,1,1,15485,39.816608,-79.33009
2275585,37,339744,22020,2,2,1,3,1,0,15676,40.243195,-79.45737
4677830,54,35885,28992,2,1,1,0,0,2,53179,42.515668,-88.13454
642630,24,3374,1860,2,1,2,0,0,2,78588,26.720155,-98.46845
3962190,50,352554,33734,2,0,0,3,1,0,92650,33.640302,-117.769442
2782799,40,30226,6091,2,0,2,0,0,1,76883,29.956952,-100.227509
482221,22,34028,2531,3,2,0,0,0,1,18821,41.977241,-75.74391
4766400,54,339889,34872,2,2,2,3,0,0,8001,39.559426,-75.3631


In [3]:
data.shape

(4955031, 12)

In [4]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [8]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [6]:
# shuffle the data
shuffle_idx = [x for x in range(0,data.shape[0])]
from random import shuffle
shuffle(shuffle_idx)
data = data.iloc[shuffle_idx]

In [7]:
data.head()

Unnamed: 0,event_id,attendee_id,order_id,purchase_quad,purchase_type,spend_cat,age_cat,gender_cat,attendee_frequency,attendee_zip,Latitude,Longitude
3516456,47,40523,18252,2,0,0,0,0,1,18074,40.316397,-75.5154
3117046,43,278438,5941,4,2,1,2,0,0,95687,38.34401,-121.95333
79512,11,6124,3893,3,1,2,0,1,2,17720,41.186545,-77.218368
2611600,39,218,70384,3,0,2,0,1,2,77039,29.909123,-95.33683
3176594,44,23828,5021,3,1,2,0,0,2,92618,33.659639,-117.73948


In [5]:
x = data[["attendee_id", "purchase_quad"]].values

# Normalize the targets between 0 and 1. Makes it easy to train.
y = data["purchase_type"].apply(lambda x: (x - data.purchase_type.min()) / (data.purchase_type.max() - data.purchase_type.min())).values # normalizes the target

# Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * data.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

KeyboardInterrupt: 

In [None]:
EMBEDDING_SIZE = 50


class RecommenderNet(keras.Model):
    def __init__(self, num_attendee, num_type, embedding_size, **kwargs):
        super(RecommenderNet, self).__init__(**kwargs)
        self.num_attendee = num_attendee
        self.num_type = num_type
        self.embedding_size = embedding_size
        self.attendee_embedding = layers.Embedding(
            num_attendee,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.attendee_bias = layers.Embedding(num_attendee, 1)
        self.quad_embedding = layers.Embedding(
            num_type,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.quad_bias = layers.Embedding(num_type, 1)

    def call(self, inputs):
        attendee_vector = self.attendee_embedding(inputs[:, 0])
        attendee_bias = self.attendee_bias(inputs[:, 0])
        quad_vector = self.quad_embedding(inputs[:, 1])
        quad_bias = self.quad_bias(inputs[:, 1])
        dot_attendee_quad = tf.tensordot(attendee_vector, quad_vector, 2)
        # Add all the components (including bias)
        x = dot_attendee_quad + attendee_bias + quad_bias
        # The sigmoid activation forces the rating to between 0 and 1
        return tf.nn.sigmoid(x)

In [None]:
data.columns

In [None]:
def value_counts_sorted(series):
    vc = series.value_counts().sort_values(ascending = False)
    n = len(vc)
    return n, vc.index, vc.values    

In [None]:
n, ids, vals = value_counts_sorted(data.event_id)

## Map Plot

In [None]:
from mpl_toolkits.basemap import Basemap

In [None]:
plt.figure(figsize=(16,10))

#colors
colors = cm.rainbow(np.linspace(0, 1, 10))

# numbers given for the continential united states
m = Basemap(projection='mill'
            , llcrnrlat = 20 # south -90 min
            , urcrnrlat = 50 # north 90 max
            , llcrnrlon = -130 # west -180 min
            , urcrnrlon = -60 # east 180 max
            , resolution = 'c'
           )

m.drawcoastlines()
m.drawmapboundary(fill_color='#edf7fe', linewidth=0)

m.fillcontinents(color='gray'
                 , alpha=0.25
                )
m.drawcountries()

events_lat_y = data['Latitude'].tolist()
events_lon_x = data['Longitude'].tolist()

m.scatter(events_lon_x
          , events_lat_y
          , latlon=True
          , c='red'
         )

m.plot(data['Longitude']
       , data['Latitude']
      )

plt.title('Map of Users as of December 2020', fontsize=30)

plt.savefig('./figures/map_users_Dec2020.png')
plt.show();