## This code compares the actions of concepts users with non-concepts users.

In [None]:
import os, squarify, pandas as pd, matplotlib.pyplot as plt

from weco_datascience.reporting import get_recent_data

In [None]:
df = get_recent_data(config=os.environ, n=100000, index="metrics-conversion-prod")

### [Definitions of actions] (https://app.gitbook.com/o/-LumfFcEMKx4gYXKAZTQ/s/MM8zK9GZyFCpy8kvd5Km/analytics-and-reporting/kibana-definitions) are translated into code below

In [None]:
concepts = df.loc[(df["page.name"]=="concept")]
concepts["action"]="concepts"
search = df.loc[(df["page.name"]=="works") & (df["page.query.query"].notnull())]
search["action"]="search"
works =  df.loc[(df["page.name"]=="work") & (df["page.query.workId"].notnull())]
works["action"]="works"
item_view = df.loc[(df["page.name"]=="item") & (
    df["properties.event"]!="download") & (df["source"]!="viewer/scroll")]
item_view["action"]="item_vew"
image_view =  df.loc[(df["page.name"]=="image") & (df["type"]=="pageview")]
image_view["action"]="image_view"
image_modal = df.loc[(df["page.name"]=="images") & (
    df["properties.event"]=="Open image modal") & (df["page.query.query"].notnull())]
image_modal["action"]="image_modal"
exhibition = df.loc[(df["page.name"]=="exhibition")]
exhibition["action"]="exhibition"
story = df.loc[(df["page.name"]=="story")]
story["action"]="story"

In [None]:
selected=[concepts, search, works, item_view, image_view, image_modal, exhibition, story]
df2=pd.concat(selected)

In [None]:
df3=df2[["@timestamp", "anonymousId", "page.query.query", "page.query.workId", "page.name", "properties.event", "action"]]

In [None]:
print("Data has been taken from", df.tail(1)["@timestamp"])

In [None]:
print("to", df.head(1)["@timestamp"])

### Capturing concepts user behaviour

In [None]:
concepts_users=concepts["anonymousId"].drop_duplicates()
concepts_ids=concepts_users.values.tolist()

In [None]:
concepts_behaviour=df3[df3["anonymousId"].isin(concepts_ids)]
concepts_behaviour2=concepts_behaviour["action"]

### Summarising concepts user behaviour by action

In [None]:
x=((concepts_behaviour2.value_counts()).to_frame())
y=x.rename(columns={"action":"count"})
#y=x.to_frame()
#y
y["action"]=y.index

### Capturing non-concepts user behaviour

In [None]:
non_concepts_behaviour=df3[df3["anonymousId"].isin(concepts_ids)==False]
non_concepts_behaviour2=non_concepts_behaviour["action"]

### Summarising non-concepts user behaviour by action

In [None]:
a=(non_concepts_behaviour2.value_counts()).to_frame()
b=a.rename(columns={"action":"count"})
b["action"]=b.index

### Here's what user behaviour looks like: concepts users vs non-concepts users

In [None]:
print("Tree map of user actions for",len(concepts_behaviour),"Concepts users")
squarify.plot(sizes=y['count'], label=y['action'], alpha=.8 )
plt.axis('off')
plt.show()

In [None]:
print("Tree map of user actions for",len(non_concepts_behaviour),"non-Concepts users")
squarify.plot(sizes=b['count'], label=b['action'], alpha=.8)
plt.axis('off')
plt.show()