## A/B-testing

In [38]:
import pandas as pd
import sqlite3

## Connection to sqlite3

In [39]:
conn = sqlite3.connect("../ex00/data/checking-logs.sqlite")

In [40]:
query = """
SELECT AVG(UNIXEPOCH(first_view)) as avg_first_view
FROM (
    SELECT test.uid, MIN(pviews.datetime) as first_view
    FROM test 
    JOIN pageviews pviews ON test.uid = pviews.uid 
    WHERE test.first_commit_ts IS NOT NULL 
    GROUP BY test.uid
)
"""
avg_view_df = pd.io.sql.read_sql(query, conn)
avg_first_view = avg_view_df['avg_first_view'].mean()
avg_first_view

np.float64(1587901354.1818182)

### Test table

In [41]:
query_test = """
WITH FirstViews AS (
    SELECT uid, MIN(datetime) AS first_view
    FROM pageviews
    GROUP BY uid
)

SELECT 
    CASE 
        WHEN UNIXEPOCH(test.first_commit_ts) < UNIXEPOCH(FirstViews.first_view) THEN 'before'
        ELSE 'after'
    END AS time,
    AVG((UNIXEPOCH(test.first_commit_ts) - deadlines.deadlines) / 3600) AS avg_diff
FROM test
JOIN deadlines ON deadlines.labs = test.labname
JOIN FirstViews ON test.uid = FirstViews.uid
WHERE test.labname != 'project1'
AND test.uid IN (         
    SELECT DISTINCT test.uid
    FROM test
    JOIN FirstViews ON test.uid = FirstViews.uid
    GROUP BY test.uid
    HAVING 
        COUNT(CASE WHEN UNIXEPOCH(test.first_commit_ts) < UNIXEPOCH(FirstViews.first_view) THEN 1 END) > 0
        AND COUNT(CASE WHEN UNIXEPOCH(test.first_commit_ts) >= UNIXEPOCH(FirstViews.first_view) THEN 1 END) > 0
)
GROUP BY time
"""
test_results = pd.io.sql.read_sql(query_test, conn)
test_results

Unnamed: 0,time,avg_diff
0,after,-93.85
1,before,-60.5625


### Control test

In [42]:
query_control = """
SELECT 
    CASE 
        WHEN UNIXEPOCH(control.first_commit_ts) < ? THEN 'before'
        ELSE 'after'
    END AS time,
    AVG((UNIXEPOCH(control.first_commit_ts) - deadlines.deadlines) / 3600) AS avg_diff
FROM control
JOIN deadlines ON deadlines.labs = control.labname
WHERE control.labname != 'project1'
AND control.uid IN (
    SELECT DISTINCT control.uid
    FROM control
    GROUP BY control.uid
    HAVING 
        COUNT(CASE WHEN UNIXEPOCH(control.first_commit_ts) < ? THEN 1 END) > 0
        AND COUNT(CASE WHEN UNIXEPOCH(control.first_commit_ts) >= ? THEN 1 END) > 0
)
GROUP BY time
"""

control_results = pd.io.sql.read_sql(query_control, conn, params=(avg_first_view, avg_first_view, avg_first_view))
control_results

Unnamed: 0,time,avg_diff
0,after,-110.457143
1,before,-130.952381


### Connection closed

In [43]:
conn.close()

## Answer

In [44]:
test_before = test_results.query("time == 'before'")['avg_diff'].iloc[0]
test_after = test_results.query("time == 'after'")['avg_diff'].iloc[0]
control_before = control_results.query("time == 'before'")['avg_diff'].iloc[0]
control_after = control_results.query("time == 'after'")['avg_diff'].iloc[0]

test_diff = test_after - test_before
control_diff = control_after - control_before

print(f"Test period difference: {test_diff:.2f} hour")
print(f"Control period difference: {control_diff:.2f} hour")

if test_diff < 0 and abs(test_diff) > abs(control_diff):
    print("Gipoteza: sahifa talabalarning xatti-harakatlariga ijobiy ta'sir qiladi.")
else:
    print("Gipoteza: sahifa talabalarning xatti-harakatlariga sezilarli ta'sir ko'rsatmaydi.")

Test period difference: -33.29 hour
Control period difference: 20.50 hour
Gipoteza: sahifa talabalarning xatti-harakatlariga ijobiy ta'sir qiladi.
