In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
from matplotlib import rcParams, cm

In [None]:
data = pd.read_csv('./data/event_transactions_CB.csv')
data.sample(10)

In [None]:
data.shape

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
df = data.sample(frac=1, random_state=42) # shuffle the data
x = data[["attendee_id", "purchase_quad"]].values

# Normalize the targets between 0 and 1. Makes it easy to train.
y = data["purchase_type"].apply(lambda x: (x - data.purchase_type.min()) / (data.purchase_type.max() - data.purchase_type.min())).values # normalizes the target

# Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * data.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

In [None]:
EMBEDDING_SIZE = 50


class RecommenderNet(keras.Model):
    def __init__(self, num_attendee, num_type, embedding_size, **kwargs):
        super(RecommenderNet, self).__init__(**kwargs)
        self.num_attendee = num_attendee
        self.num_type = num_type
        self.embedding_size = embedding_size
        self.attendee_embedding = layers.Embedding(
            num_attendee,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.attendee_bias = layers.Embedding(num_attendee, 1)
        self.quad_embedding = layers.Embedding(
            num_type,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.quad_bias = layers.Embedding(num_type, 1)

    def call(self, inputs):
        attendee_vector = self.attendee_embedding(inputs[:, 0])
        attendee_bias = self.attendee_bias(inputs[:, 0])
        quad_vector = self.quad_embedding(inputs[:, 1])
        quad_bias = self.quad_bias(inputs[:, 1])
        dot_attendee_quad = tf.tensordot(attendee_vector, quad_vector, 2)
        # Add all the components (including bias)
        x = dot_attendee_quad + attendee_bias + quad_bias
        # The sigmoid activation forces the rating to between 0 and 1
        return tf.nn.sigmoid(x)

In [None]:
data.columns

In [None]:
def value_counts_sorted(series):
    vc = series.value_counts().sort_values(ascending = False)
    n = len(vc)
    return n, vc.index, vc.values    

In [None]:
n, ids, vals = value_counts_sorted(data.event_id)

## Map Plot

In [None]:
from mpl_toolkits.basemap import Basemap

In [None]:
plt.figure(figsize=(16,10))

#colors
colors = cm.rainbow(np.linspace(0, 1, 10))

# numbers given for the continential united states
m = Basemap(projection='mill'
            , llcrnrlat = 20 # south -90 min
            , urcrnrlat = 50 # north 90 max
            , llcrnrlon = -130 # west -180 min
            , urcrnrlon = -60 # east 180 max
            , resolution = 'c'
           )

m.drawcoastlines()
m.drawmapboundary(fill_color='#edf7fe', linewidth=0)

m.fillcontinents(color='gray'
                 , alpha=0.25
                )
m.drawcountries()

events_lat_y = data['Latitude'].tolist()
events_lon_x = data['Longitude'].tolist()

m.scatter(events_lon_x
          , events_lat_y
          , latlon=True
          , c='red'
         )

m.plot(data['Longitude']
       , data['Latitude']
      )

plt.title('Map of Users as of December 2020', fontsize=30)

plt.savefig('./figures/map_users_Dec2020.png')
plt.show();