### How useful do people find concepts? What do they do after they land?

In [None]:
import os, pandas as pd
from weco_datascience.reporting import get_recent_data

In [None]:
df = get_recent_data(config=os.environ, n=100000, index="metrics-conversion-prod")
df=df.loc[(~df["page.path"].str.contains("cachebust"))].fillna("")

### Date parameters

In [None]:
print("Data has been taken from", df.tail(1)["@timestamp"])

In [None]:
print("to", df.head(1)["@timestamp"]) 

In [None]:
df2=df[["@timestamp", "anonymousId", "page.name", "page.path", "page.query.workId", "page.query.query", "page.query.conceptId", "page.query.id", "source", "type", "properties.imageId", "properties.event"]]
df2.insert(0, "check10", df["page.path"].str.strip().str[:10])
df2.insert(0, "check7", df["page.path"].str.strip().str[:7])
df2.insert(0, "sourcechop", df["source"].str.strip().str[-8:])
df2.insert(0, "pathchop", df["page.path"].str.strip().str[-8:])

### Selecting actions of users who view Concepts pages

In [None]:
concepts_users = df2.loc[(df["page.name"]=="concept"), ["anonymousId"]].drop_duplicates()
concepts_ids=concepts_users["anonymousId"].astype(str).values.tolist()
concepts_behaviour=df2.loc[(df["anonymousId"].isin(concepts_ids))]
sorted=concepts_behaviour.sort_values(["anonymousId", "@timestamp"]).reset_index()

### Identifying actions  

In [None]:
concepts = sorted.loc[(sorted["page.name"]=="concept") & (sorted["check10"]=="/concepts/")]
concepts.insert(0, "action", "concepts")

In [None]:
works =  sorted.loc[(sorted["page.name"]=="work") & (sorted["page.query.workId"].notnull()) & (sorted["check7"]=="/works/") & (sorted["source"].str.contains("/concepts/")==True) ]
works.insert(0, "action", "works")

In [None]:
item_view = sorted.loc[(sorted["page.name"]=="item") & (sorted["check7"]=="/works/") & (sorted["page.path"].str.contains("canvas")==False) & (
    sorted["source"].str.contains("work_/works/")==True) ]
item_view.insert(0, "action", "item_view")

In [None]:
image_view =  sorted.loc[(sorted["page.name"]=="image") & (sorted["type"]=="pageview") & (sorted["page.query.id"].notnull()) & (sorted["source"].str.contains("/concepts/")==True)]
image_view.insert(0, "action", "image_view")

In [None]:
image_modal = sorted.loc[(sorted["page.name"]=="concept") & (
    sorted["properties.event"]=="Open image modal") & (sorted["check10"]=="/concepts/") & (sorted["page.path"].str.contains("#")==False)] 
image_modal.insert(0, "action", "image_modal")

In [None]:
###Do we have less than 1,000 nodes?
print(len(concepts)+len(works)+len(item_view)+len(image_view)+len(image_modal))

### Creating nodes for Neo4j

In [None]:
### CONCEPTS
concepts["cypher_code"]= "(" + concepts["page.query.conceptId"] + ":" + concepts["action"] + "),"
check=concepts["cypher_code"].drop_duplicates()
len(check)


In [None]:
### WORKS
works["cypher_code"]= "(" + works["page.query.workId"] + ":" + works["action"] + "),"
check=works["cypher_code"].drop_duplicates()
len(check)

In [None]:
### ITEM_VIEW
item_view["cypher_code"]= "(" + item_view["page.path"] + ":" + item_view["action"] + "),"
item_view["cypher_code"]=item_view["cypher_code"].str.replace(r"\/", "", regex=True)
check=item_view["cypher_code"].drop_duplicates()
len(check)

In [None]:
### IMAGE MODAL
image_modal["cypher_code"]= "(M" + image_modal["page.query.conceptId"] + ":" + image_modal["action"] + "),"
check=image_modal["cypher_code"].drop_duplicates()
len(check)

In [None]:
### IMAGE VIEW
image_view["cypher_code"]= "(" + image_view["page.query.id"] + ":" + image_view["action"] + "),"
check=image_view["cypher_code"].drop_duplicates()
len(check)

### Creating relationships in Neo4j

In [None]:
concepts2works="(" + works["sourcechop"] + ")-[:clicked_on]->(" + works["page.query.workId"] + "),"
works2item_view="(" + item_view["sourcechop"] + ")-[:clicked_on]->(" + item_view["page.path"].str.replace(r"\/", "", regex=True) + "),"
concepts2image_modal="(" + image_modal["page.query.conceptId"] + ")-[:clicked_on]->(M" + image_modal["page.query.conceptId"] + "),"
image_modal2image_view="(M" + image_view["sourcechop"] + ")-[:clicked_on]->(" + image_view["page.query.id"] + "),"

In [None]:
cypher_code=pd.concat([ concepts["cypher_code"], works["cypher_code"], item_view[
                  "cypher_code"], image_modal["cypher_code"], image_view[
                       "cypher_code"], concepts2works, works2item_view, concepts2image_modal, image_modal2image_view.drop_duplicates()])
cypher_code.to_csv("cypher_code.csv", index=False)

### Download cypher_code. Change A1 to "CREATE". Finish last row with ";".

### What do the users look like who didn't click any concepts links after viewing?

### Here are all the records for users who viewed a concept and didn't click any links

In [None]:
concepts_clicked=pd.concat([works["sourcechop"], image_modal["page.query.conceptId"]]).drop_duplicates()
concepts_clicked2=concepts_clicked.tolist()
concepts_not_clicked=concepts.loc[(~concepts["page.query.conceptId"].isin(concepts_clicked2))].drop_duplicates()
concepts_not_clicked2=concepts_not_clicked["page.query.conceptId"].tolist()
single_concept_users=df2.loc[(df2["page.query.conceptId"].isin(concepts_not_clicked2))]
single_concept_users2=single_concept_users["anonymousId"].tolist()
single_concept_users2=df2.loc[(df2["anonymousId"].isin(single_concept_users2))]

### Identifying all actions including search and paging through 

In [None]:
search = single_concept_users2.loc[(single_concept_users2["page.name"].isin(["works", "search", "images"]) & (
        single_concept_users2["page.query.query"].notnull()))]
search.insert(0, "action", "search")
search.head(1)

In [None]:
concepts = single_concept_users2.loc[(single_concept_users2["page.name"]=="concept")]
concepts.insert(0, "action", "concepts")

In [None]:
works =  single_concept_users2.loc[(single_concept_users2["page.name"]=="work") & (
    single_concept_users2["page.query.workId"].notnull())]
works.insert(0, "action", "works")

In [None]:
item_view = single_concept_users2.loc[(single_concept_users2["page.name"]=="item") & (
    single_concept_users2["properties.event"]!="download")]
item_view.insert(0, "action", "item_view")

In [None]:
image_view =  single_concept_users2.loc[(single_concept_users2["page.name"]=="image") & (single_concept_users2["type"]=="pageview") & (single_concept_users2["page.query.id"].notnull())]
image_view.insert(0, "action", "image_view")

In [None]:
image_modal = single_concept_users2.loc[(single_concept_users2["properties.event"]=="Open image modal")] 
image_modal.insert(0, "action", "image_modal")

In [None]:
all=pd.concat([search[["anonymousId", "action"]], concepts[["anonymousId", "action"]], works[[
    "anonymousId", "action"]], item_view[["anonymousId", "action"]], image_view[["anonymousId", "action"]],image_modal[["anonymousId", "action"]] ])

In [None]:
all["count_actions"]=1

In [None]:
all2=all.groupby(["anonymousId", "action"]).agg({'count_actions': ['sum']})

In [None]:
all2.to_csv("all2.csv")