# Online Evaluation

In [1]:
import pandas as pd
from tahoe_api.api import TahoeQuery, import_data_td, get_latest_table_generic, table_create, table_delete, check_table_exists



## V2

In [1]:
since_dt = '2023-01-01'
end_dt = '2023-09-01'

# V2 model has user_id=61b2599df8ce712b2305d746 which means records with this user_id indicates the auto-approve from the model.
# As a comparison, the prediction of `uncertain` will always reviewed by other user_id.
# Therefore, filtering the user_id can give us the positive or negative predictions.

appropriate_statement = '='
inappropriate_statement = '!='

basic_sql = """
    WITH tagging AS (
    SELECT job_id,
    product_id,
    is_adult,
    inappropriate_reason,
    subcategory,
    dt,
    hr
    FROM sweeper.true_tags
    WHERE dt >= '{since_dt}' and dt <= '{end_dt}'
    and user_id{condition}'61b2599df8ce712b2305d746'
    and action = 120 -- V2_INAPPROPRIATE_TAG_COMPLETED
    ),
    audit AS (
    SELECT tagging_job_id,
    product_id,
    is_adult,
    inappropriate_reason,
    subcategory,
    dt,
    hr
    FROM sweeper.inappropriate_audit_events
    WHERE dt >= '{since_dt}' and dt <= '{end_dt}'
    AND action = 4 -- audit job completed
    )

    SELECT tagging.job_id, tagging.inappropriate_reason as tagging_inappropriate_reason, audit.inappropriate_reason as audit_inappropriate_reason, tagging.dt

    FROM tagging
    LEFT JOIN audit ON tagging.job_id = audit.tagging_job_id
"""

appropriate_sql = basic_sql.format(since_dt=since_dt, end_dt=end_dt, condition=appropriate_statement)
inappropriate_sql = basic_sql.format(since_dt=since_dt, end_dt=end_dt, condition=inappropriate_statement)

In [7]:
rows = TahoeQuery(appropriate_sql, db_type='presto').run_query()
# df = pd.DataFrame(rows)



In [8]:
df = pd.DataFrame(rows)
df.shape

(34266791, 4)

In [48]:
df = pd.read_csv('inappropriate.csv')

In [10]:
df

Unnamed: 0,job_id,tagging_inappropriate_reason,audit_inappropriate_reason,dt
0,64d46767a4f16e7efec76ad4,,,2023-08-10
1,64d468812cbb48e64f28a6be,,,2023-08-10
2,64d468e288ae1e62b9e9bd7f,,,2023-08-10
3,64d466a04252ba792acdb26d,,,2023-08-10
4,64d4692ff72de532f530a8f8,,,2023-08-10
...,...,...,...,...
34266786,64416174a38ede187f5833d4,,,2023-04-20
34266787,6441616978d4827b63215c2a,,,2023-04-20
34266788,644168d4bccd95942ddcc10d,,,2023-04-20
34266789,64416f1d711203914a5e4c21,,,2023-04-20


In [26]:
import numpy as np

pd.isna(df['audit_inappropriate_reason'][0])

True

In [49]:
df = df.fillna('nan')

In [50]:
# predict = ['nan' for v in df['tagging_inappropriate_reason'] if v is None]
# gt = ['nan' for v in df['audit_inappropriate_reason'] if pd.isna(v)]

# predict.extend(df['tagging_inappropriate_reason'].tolist())
predict.extend(['inappropriate' if val != 'nan' else val for val in df['tagging_inappropriate_reason'].tolist()])
gt.extend(['inappropriate' if val != 'nan' else val for val in df['audit_inappropriate_reason'].tolist()])

In [2]:
def load_data_from_file(csv_file: str):
    df = pd.read_csv(csv_file)
    df = df.fillna('nan')

    predict_array = ['inappropriate' if val != 'nan' else val for val in df['tagging_inappropriate_reason'].tolist()]
    gt_array = ['inappropriate' if val != 'nan' else val for val in df['audit_inappropriate_reason'].tolist()]

    return predict_array, gt_array

In [3]:
predict_array = []
gt_array = []

In [4]:
for file_name in ['appropriate.csv', 'inappropriate.csv']:
    pred, gt = load_data_from_file(file_name)
    predict_array.extend(pred)
    gt_array.extend(gt)

In [6]:
from sklearn.metrics import precision_score, recall_score, f1_score

In [8]:
precision_score(gt_array, predict_array, pos_label='inappropriate'), recall_score(gt_array, predict_array, pos_label='inappropriate')

ValueError: Found input variables with inconsistent numbers of samples: [73033121, 107299912]

In [46]:
set(gt)

{'inappropriate', 'nan'}