# Exploratory Data Analysis of Markov Simulation Project - Weekly data

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
df = pd.read_csv("../data/weekly/data_clean.csv", parse_dates=["timestamp"], index_col=0)
df.tail()

In [None]:
def map_day(number):
    first_digit = int(str(number)[0])
    if first_digit == 1:
        return "Monday"
    elif first_digit == 2:
        return "Tuesday"
    elif first_digit == 3:
        return "Wednesday"
    elif first_digit == 4:
        return "Thursday"
    elif first_digit == 5:
        return "Friday"
    else:
        return "Unknown"  # Add an 'Unknown' category for other values


# Apply the mapping function to create the week_day column
df["week_day"] = df["customer_no"].map(map_day)

In [None]:
df

In [None]:
df["hour"] = pd.to_datetime(df["timestamp"]).dt.hour
df["minute"] = pd.to_datetime(df["timestamp"]).dt.minute
df

In [None]:
df_customer_section = df.groupby(["week_day", "location"])["customer_no"].nunique()
df_customer_section

In [None]:
# Group the data by hours and sections, count the number of unique customers in each group
grouped_data_all = (
    df.groupby(["hour", "location"])["customer_no"].nunique().reset_index()
)
grouped_data_all

## The total number of customers in each section over the time in a week.

In [None]:
# sns.set(style="darkgrid")
sns.set_theme(style="white")
plt.figure(figsize=(10, 5))
sns.lineplot(data=grouped_data_all, x="hour", y="customer_no", hue="location")
plt.xlabel("Time in Hours")
plt.ylabel("Frequency of Customers")
plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0)

## The number of customers in each section over time in each weekday.

In [None]:
grouped_data_weekly = (
    df.groupby(["hour", "week_day", "location"])["customer_no"].nunique().reset_index()
)
grouped_data_weekly

In [None]:
weekday_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]

In [None]:
sns.set_theme(style="white")
g = sns.FacetGrid(grouped_data_weekly, col="week_day", col_wrap=3, height=4, aspect=1.2)
g.map(sns.lineplot, "hour", "customer_no", "location")
g.set_axis_labels("Time in Hours", "Frequency of Customers")
g.add_legend(title="Location")
plt.tight_layout()
plt.show()

## Transtion probabilities for each weekday

### Monday transition probabilities

In [None]:
df_mon = df[df["week_day"] == "Monday"].copy()
df_mon.tail()

In [None]:
df_mon["loc_next"] = df_mon.groupby("customer_no")["location"].shift(-1)
df_mon

In [None]:
P_mon = pd.crosstab(
    index=df_mon["location"],
    columns=df_mon["loc_next"],
    normalize="index",
)

# Add checkout row
checkout = pd.DataFrame(
    {"checkout": [1], "dairy": [0], "drinks": [0], "fruit": [0], "spices": [0]},
    index=["checkout"],
)
P_mon = pd.concat([P_mon, checkout])

# Add entrance column
P_mon["entrance"] = 0.0

P_mon = P_mon[["entrance", "dairy", "drinks", "fruit", "spices", "checkout"]]
P_mon.to_csv("../data/weekly/monday_tp.csv")

### Tuesday transition probabilities

In [None]:
df_tu = df[df["week_day"] == "Tuesday"].copy()

In [None]:
df_tu["loc_next"] = df_tu.groupby("customer_no")["location"].shift(-1)

In [None]:
P_tu = pd.crosstab(
    index=df_tu["location"],
    columns=df_tu["loc_next"],
    normalize="index",
)

# Add checkout row
checkout = pd.DataFrame(
    {"checkout": [1], "dairy": [0], "drinks": [0], "fruit": [0], "spices": [0]},
    index=["checkout"],
)
P_tu = pd.concat([P_tu, checkout])

# Add entrance column
P_tu["entrance"] = 0.0

P_tu = P_tu[["entrance", "dairy", "drinks", "fruit", "spices", "checkout"]]
P_tu.to_csv("../data/weekly/tuesday_tp.csv")

### Wednesday transition probabilities

In [None]:
df_wed = df[df["week_day"] == "Wednesday"].copy()
df_wed.tail()

In [None]:
df_wed["loc_next"] = df_wed.groupby("customer_no")["location"].shift(-1)
df_wed

In [None]:
P_wed = pd.crosstab(
    index=df_wed["location"],
    columns=df_wed["loc_next"],
    normalize="index",
)

# Add checkout row
checkout = pd.DataFrame(
    {"checkout": [1], "dairy": [0], "drinks": [0], "fruit": [0], "spices": [0]},
    index=["checkout"],
)
P_wed = pd.concat([P_wed, checkout])

# Add entrance column
P_wed["entrance"] = 0.0

P_wed = P_wed[["entrance", "dairy", "drinks", "fruit", "spices", "checkout"]]
P_wed.to_csv("../data/weekly/wednesday_tp.csv")

### Thursday transition probabilities

In [None]:
df_th = df[df["week_day"] == "Thursday"].copy()
df_th.tail()

In [None]:
df_th["loc_next"] = df_th.groupby("customer_no")["location"].shift(-1)
df_th

In [None]:
P_th = pd.crosstab(
    index=df_th["location"],
    columns=df_th["loc_next"],
    normalize="index",
)

# Add checkout row
checkout = pd.DataFrame(
    {"checkout": [1], "dairy": [0], "drinks": [0], "fruit": [0], "spices": [0]},
    index=["checkout"],
)
P_th = pd.concat([P_th, checkout])

# Add entrance column
P_th["entrance"] = 0.0

P_th = P_th[["entrance", "dairy", "drinks", "fruit", "spices", "checkout"]]
P_th.to_csv("../data/weekly/thursday_tp.csv")

### Friday transition probabilities

In [None]:
df_fr = df[df["week_day"] == "Friday"].copy()
df_fr.tail()

In [None]:
df_fr["loc_next"] = df_fr.groupby("customer_no")["location"].shift(-1)
df_fr

In [None]:
P_fr = pd.crosstab(
    index=df_fr["location"],
    columns=df_fr["loc_next"],
    normalize="index",
)

# Add checkout row
checkout = pd.DataFrame(
    {"checkout": [1], "dairy": [0], "drinks": [0], "fruit": [0], "spices": [0]},
    index=["checkout"],
)
P_fr = pd.concat([P_fr, checkout])

# Add entrance column
P_fr["entrance"] = 0.0

P_fr = P_fr[["entrance", "dairy", "drinks", "fruit", "spices", "checkout"]]
P_fr.to_csv("../data/weekly/friday_tp.csv")