# Project Leaderboard and Annotation Reminder prototyping
Leaderboard goal: to add competition to the platform and allow for incentivised participation  

Reminder Goal: Tool to help users compete with a push rather than a pull mechanism

In [1]:
import pandas as pd
import logging
from app.data_loader import FileDataLoader, DBDataLoader
import instance.config as config
from smtplib import SMTP
from email.message import EmailMessage


# jup specific
from IPython.display import display, HTML
# config notebook


"""Build a logger"""
logger = logging.getLogger(__name__)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)


"""Build datasource"""
if config.DATASOURCE_TYPE == 'file':
    pv_dl = FileDataLoader(config.DATASOURCE_LOCATION, logger)
elif config.DATASOURCE_TYPE == 'db':
    pv_dl = DBDataLoader(config.VAULT_TOKEN,
                         config.VAULT_SERVER,
                         config.DATASOURCE_LOCATION,
                         config.DB_SCHEMA,
                         logger)


!!!scheduler already started, DO NOTHING


In [2]:
config.HOST_FQDN+config.BLUEPRINT_URL_PREFIX

'http://localhost:5001/'

# Email Reminder Prototyping
RE-use the same code used to build the table on pieval index except drop it in an email instead of place it on a webpage!

In [3]:
# get user data - this will limit to people who have been configured to receive alerts
user_details = pv_dl.getUserData()
user_details

2020-05-01 13:57:50,461 - DEBUG - Starting new HTTPS connection (1): vault-ri.ucdmc.ucdavis.edu:8200
2020-05-01 13:57:50,699 - DEBUG - https://vault-ri.ucdmc.ucdavis.edu:8200 "GET /v1/cdi3/db/cdi3sql01/dev HTTP/1.1" 200 369
2020-05-01 13:57:50,701 - INFO - Creating MSSQL Database Engine
2020-05-01 13:57:50,701 - INFO - Creating MSSQL Database Engine


creating sqlalchemy engine


[{'user_name': 'awriedl',
  'print_name': 'Bill',
  'email': 'awriedl@ucdavis.edu'},
 {'user_name': 'jmcawood',
  'print_name': 'Joseph',
  'email': 'awriedl@ucdavis.edu'},
 {'user_name': 'mrenquis',
  'print_name': 'Matt',
  'email': 'awriedl@ucdavis.edu'},
 {'user_name': 'cyvhuynh', 'print_name': 'Cy', 'email': 'awriedl@ucdavis.edu'},
 {'user_name': 'jdcobabe',
  'print_name': 'Jared',
  'email': 'awriedl@ucdavis.edu'}]

In [4]:
dryrun=True
from_email='test@test.com'
days_till_nag = 3

In [None]:
# get project data (all projects)
data = pv_dl.getProjectData(return_as_dataframe=True)
for one_user in user_details:
    user_name = one_user.get('user_name')
    print_name = one_user.get('print_name')
    email = one_user.get('email')
    
    # get data for this user
    try:
        pieval_projects = pv_dl.getProjects(user_name=user_name, return_as_dataframe = True)
        prev_annots_for_user = pv_dl.getPriorAnnotations(user_name=user_name, return_as_dataframe=True)
        
        # group and join
        proj_example_counts = (data.groupby(['project_name'])
                                       .size()
                                       .to_frame()
                                       .rename(columns={0:'num_examples'})
                                       .reset_index())

        user_proj_counts = (prev_annots_for_user.groupby(['project_name'])
                                   .agg(
                                       num_annotated=('example_id','count'),
                                       last_annot_time=('response_time','max')
                                   )
                                   .reset_index())
        user_proj_counts['days_since_last'] = (pd.datetime.now() - user_proj_counts['last_annot_time']).dt.days

        proj_status = pd.merge(proj_example_counts,
                               user_proj_counts,
                               on='project_name',
                               how='left')
        proj_status['pct_complete'] = round((proj_status['num_annotated'] / proj_status['num_examples']) * 100)
        proj_status = proj_status.fillna(0)
        pieval_projects = pieval_projects.merge(proj_status.filter(['project_name','pct_complete']),
                                                on='project_name', how='left')
        incomplete_projects = (pieval_projects.loc[((pieval_projects['pct_complete'] < 100)
                                                   &(pieval_projects['days_since_last'] > 3))]
                                              .to_dict(orient='records'))

        logger.info(f"Current Username is {user_name}, print name is {print_name} and email is {email}")
        #print(incomplete_projects)
        if len(incomplete_projects) > 0:
            # build a status_string with N entries
            status_string=''
            for ip in incomplete_projects:
                status_string += f"""You are {ip.get('pct_complete')} percent \
complete on the {ip.get('project_name')} project\n"""
            
            # combine that into the message
            message = f"""Hi {print_name},\nThis is Pieval!  I'm reaching out because \
you have {len(incomplete_projects)} incomplete annotation projects \
and I would like to see you stay on top of the leaderboard!\n{status_string}\
Please login here to finish up: {config.HOST_FQDN+config.BLUEPRINT_URL_PREFIX}
"""
            
            logger.info("===== SENDING EMAIL ==========")
            msg = EmailMessage()
            msg.set_content(message)
            msg['Subject'] = f'Pieval Annotation Reminder'
            msg['From'] = from_email
            msg['To'] = email

            if dryrun:
                logger.info("DRY RUN!!")
                print(message)
            else:
                try:
                    with SMTP("smtp.ucdavis.edu", port=587) as smtp:
                        smtp.send_message(msg)
                        logger.info(f'Sent reminder email to: {email}')
                except:
                    logger.error(f'Could not send email to {email}.')


    except KeyError as ke:
        logger.error("There is not data for this user!")
        logger.error(ke)
    
    print()
    
    
    

In [None]:
prev_annots_for_user = pv_dl.getPriorAnnotations(user_name='awriedl', return_as_dataframe=True)

In [None]:
prev_annots_for_user

In [None]:
user_proj_counts = (prev_annots_for_user.groupby(['project_name'])
                                   .agg(
                                       num_annotated=('example_id','count'),
                                       last_annot_time=('response_time','max')
                                   )
                                   .reset_index())
user_proj_counts['days_since_last'] = (pd.datetime.now() - user_proj_counts['last_annot_time']).dt.days
user_proj_counts

In [None]:
pd.datetime.now()

### Project Leaderboard prototyping
Added as an extra table on the already exisiting project page
This code just to test the grouping logic

In [23]:
pv_dl.getProjects(return_as_dataframe=True)

2020-05-01 14:11:19,355 - DEBUG - DB Engine Check Successful
2020-05-01 14:11:19,355 - DEBUG - DB Engine Check Successful


Unnamed: 0,project_name,project_description,project_mode
0,kappa_lambda_demo,DEMO PROJECT: Determining if monotypic/clonal ...,binary
1,cncr_hist_mc_demo,DEMO PROJECT: Classifying cancer histology fro...,multiclass
2,movie_reviews_demo,DEMO PROJECT: Classifying sentiment from movie...,multiclass
3,aml_bone_marrow_results,test project description,binary


In [29]:
prev_proj_annots_for_user_df = pv_dl.getPriorAnnotations(project_name='movie_reviews_demo',
                                           return_as_dataframe=True)
prev_proj_annots_for_user_df

2020-05-01 14:15:09,082 - DEBUG - DB Engine Check Successful
2020-05-01 14:15:09,082 - DEBUG - DB Engine Check Successful


Unnamed: 0,response_time,project_name,user_name,user_ip,example_id,response,context_viewed
2,2020-04-30 15:57:37,movie_reviews_demo,jmcawood,127.0.0.1,16.0,agree,no
3,2020-04-30 15:57:25,movie_reviews_demo,mrenquis,127.0.0.1,4.0,agree,no
4,2020-04-30 15:57:37,movie_reviews_demo,mrenquis,127.0.0.1,0.0,neutral,no
5,2020-04-30 15:57:46,movie_reviews_demo,mrenquis,127.0.0.1,16.0,agree,no
6,2020-05-01 13:36:31,movie_reviews_demo,awriedl,127.0.0.1,16.0,agree,no
8,2020-05-01 11:33:33,movie_reviews_demo,jdcobabe,127.0.0.1,16.0,neutral,no


In [32]:
project_leaderboard = (prev_proj_annots_for_user_df.groupby(['user_name']).size()
                                                           .to_frame()
                                                           .rename(columns={0:'annotation_count'})
                                                           .sort_values(['annotation_count'], ascending=False)
                                                           .reset_index(drop=False))
# add medals
if project_leaderboard.shape[0] >= 1:
    project_leaderboard.loc[0,'medal'] = 'images/gold_small.png'
if project_leaderboard.shape[0] >= 2:
    project_leaderboard.loc[1,'medal'] = 'images/silver_small.png'
if project_leaderboard.shape[0] >= 3:
    project_leaderboard.loc[2,'medal'] = 'images/bronze_small.png'
project_leaderboard['medal'] = project_leaderboard['medal'].fillna('images/sad_small.png')

project_leaderboard = project_leaderboard.to_dict(orient='records')
project_leaderboard

[{'user_name': 'mrenquis',
  'annotation_count': 3,
  'medal': 'images/gold_small.png'},
 {'user_name': 'awriedl',
  'annotation_count': 1,
  'medal': 'images/silver_small.png'},
 {'user_name': 'jdcobabe',
  'annotation_count': 1,
  'medal': 'images/bronze_small.png'},
 {'user_name': 'jmcawood',
  'annotation_count': 1,
  'medal': 'images/sad_small.png'}]

In [18]:
project_leaderboard.shape

(2, 2)

In [19]:
pd.DataFrame().shape[0]

0