In [None]:
import os

from weco_datascience.reporting import get_recent_data

In [None]:
df = get_recent_data(config=os.environ, n=10000,
                     index="metrics-conversion-prod")


Empty or null queries are almost always the second most popular search term. Are people trying to get access to filters by submitting nothing? How many times does this occur?

In [None]:
searches = df.loc[
    (df["page.name"].isin(["images", "works"]))
    & (df["properties.totalResults"].notnull())
]

null_searches = searches.loc[(searches["page.query.query"].isnull())]

len(null_searches)

In [None]:
len(searches)

354 out of a total of 3,108 searches are empty or null queries. Why?

Testing reveal that to expose filters, you need to enter at least a space. Are these users doing that?

In [None]:
space_searches = null_searches.loc[
    (null_searches["page.path"].isin(["/images?query=+", "/works?query=+"]))
]
len(space_searches)

Null queries can also be generated by moving between tabs. Is this what's happening?

In [None]:
tab_usage = null_searches.loc[(null_searches["page.path"].isin(["/images", "/works"]))]
len(tab_usage)

`page.query.query` is also missing when `page.name = images` or `= works` when users click on link buttons. This has the effect of filtering for the link value.

In [None]:
link_usage = null_searches[null_searches["page.path"].str.contains("label")]

view = ["anonymousId", "page.query.query", "page.path", "page.name"]
link_usage[view].head()

How many link clicks are being included in null searches?

In [None]:
len(link_usage)

So of 354 empty/null searches, 86 result from moving between tabs and 104 result from clicking on links. How have the other searches occurred?

In [None]:
hmmm = null_searches.loc[(~null_searches["page.path"].isin(["/images", "/works"]))]
hmmm2 = hmmm[~hmmm["page.path"].str.contains("label")]

view = ["anonymousId", "page.query.query", "page.path", "page.name"]
hmmm2[view].head()

How many searches come from Wellcome Images?

In [None]:
hmmm3 = hmmm2[hmmm2["page.path"].str.contains("wellcomeImagesUrl")]
len(hmmm3)

and what about the rest of the null/empty searches?

In [None]:
hmmm4 = hmmm2.loc[(~hmmm2["page.path"].str.contains("wellcomeImagesUrl"))]
view = ["anonymousId", "page.query.query", "page.path", "page.name", "source"]
hmmm4[view]

So the 354 null or empty searches can be attributed as:-
- 46% (162) searches coming from Wellcome Images which do not populate page.query.query
- 29% (104) link clicks
- 24% (86) tabbing between catalogue and image search
- 1 pagination
- 1 from a Google ad campaign promoting Collections