# HOP-5: if same day transaction count = transaction count, average day wait = 0, std day wait = 0

Description: if same day transaction count is equal to transaction count, then both average day wait and std day wait should be zero.

Starting Author: Amy Jin (amy@careset.com)

Date: July 20th, 2018

https://docs.google.com/spreadsheets/d/1IYg01IpssJaWHo6KxO4_dSDgXtYNFy41S5cIHFLvlGQ/edit#gid=604789549

## Connection to Parenthood Server

In [1]:
# Packages import
import os
import sys
import numpy as np
import pandas as pd
from collections import Counter
import operator
import mysql.connector
import sshtunnel
import pureyaml

# Handle path
project_dir = !pwd  # dir of current script/notebook file
config_file = open(project_dir[0] + "/db.yaml");
config = pureyaml.load(config_file.read());

# Argument dictionary for sshtunnel
ssh_config = {
    'ssh_address_or_host': ('parenthood.set.care', 22),
    'ssh_username':        config['ssh_username'],
    'ssh_password':        config['ssh_password'],
    'remote_bind_address': ('127.0.0.1', 3306),
    'local_bind_address':  ('0.0.0.0', 3333),
}

# Argument dictionary for mysql.connector
mysql_config = {
    'user':     config['mysql_user'],
    'password': config['mysql_passwd'],
    'host':     config['mysql_host'],
    'database': 'patch',
    'port':     3333,
}

# Connect to Parenthood server
with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
    print('SSH tunneling successful on port: {}'.format(tunnel.local_bind_port))
    connection = mysql.connector.connect(**mysql_config)
    cur = connection.cursor()
    print('MySQL server connected successfully!')

SSH tunneling successful on port: 3333
MySQL server connected successfully!


## Test Function

In [2]:
# --------------------------------------- Inputs: ---------------------------------------
# 1) db_name:                       database name in server
# 2）table_name:                    table name
# 3) same_day_transaction_count:    same day transaction count column
# 4) transaction_count:             transaction count column 
# 5) average_day_wait:              average day wait column 
# 6) std_day_wait:                  std day wait column 
# --------------------------------------- Outputs: --------------------------------------
# 1) Test result:            PASS/FAIL
# 2) If FAIL, the test will print out all the same_day_transaction_count, transaction_count, 
# average_day_wait, std_day_wait that fail the test.


def hop_5(db_name, table_name, same_day_transaction_count, \
             transaction_count, average_day_wait, std_day_wait):

    with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
        connection = mysql.connector.connect(**mysql_config)
        cur = connection.cursor()

        query = ('''
            SELECT *
            FROM {db}.{t1}
            WHERE ({col1} = {col2} AND {col3} <> 0) OR
                  ({col1} = {col2} AND {col4} <> 0)
            LIMIT 1;
        '''.format(db = db_name, t1 = table_name, col1 = same_day_transaction_count, \
             col2 = transaction_count, col3 = average_day_wait, col4 = std_day_wait))

        cur.execute(query)
        rows = list(sum(cur.fetchall(), ()))

        if not len(rows):
            print ("Count-8 test result for {}.{} : PASS".format(db_name, table_name))
        else:
                   
            print ("Count-8 test result for {}.{} : FAIL".format(db_name, table_name) + '\n')
            print ("The following rows fail the test:" + '\n')
            print ('{}, {}, {}, {}'.format(same_day_transaction_count, transaction_count, average_day_wait, std_day_wait) + '\n')

            # MySQL query to get the failed rows
            query = ('''
                SELECT {col1}, {col2}, {col3}, {col4}
                FROM {db}.{t1}
                WHERE ({col1} = {col2} AND {col3} <> 0) OR
                      ({col1} = {col2} AND {col4} <> 0);
            '''.format(db = db_name, t1 = table_name, col1 = same_day_transaction_count, \
             col2 = transaction_count, col3 = average_day_wait, col4 = std_day_wait))

            cur.execute(query)

            for row in cur.fetchall():
                for i in range(0,len(row)):
                    print (str(row[i]), end=", ")
                print ('\n')

        cur.close()
        connection.close()

## Test Example

In [3]:
hop_5('_amy', 'test_hop_directed_good', 'same_day_transaction_count', \
             'transaction_count', 'average_day_wait', 'std_day_wait')

Count-8 test result for _amy.test_hop_directed_good : PASS


In [4]:
hop_5('_amy', 'test_hop_directed_bad', 'same_day_transaction_count', \
             'transaction_count', 'average_day_wait', 'std_day_wait')

Count-8 test result for _amy.test_hop_directed_bad : FAIL

The following rows fail the test:

same_day_transaction_count, transaction_count, average_day_wait, std_day_wait

218, 218, 20.11, 51.712, 

32, 32, 24.094, 25.656, 

46, 46, 7.826, 16.016, 

115, 115, 38.852, 65.679, 

21, 21, 37.762, 66.025, 

12, 12, 21.917, 23.4, 

13, 13, 82.923, 69.648, 

180, 180, 38.228, 62.013, 

12, 12, 53.667, 49.658, 

21, 21, 39.857, 61.7, 

39, 39, 73.385, 84.772, 

46, 46, 35.87, 64.345, 

0, 0, 37.333, 77.563, 

0, 0, 70.578, 70.345, 

0, 0, 89.137, 82.283, 

0, 0, 80.241, 78.921, 

0, 0, 62.786, 79.125, 

0, 0, 117.2, 103.962, 

0, 0, 44.467, 61.054, 

0, 0, 52.56, 50.961, 

0, 0, 90.647, 93.535, 

0, 0, 29.172, 46.094, 

0, 0, 20.365, 27.387, 

0, 0, 48.519, 44.526, 

0, 0, 82.656, 71.212, 

0, 0, 42.364, 37.033, 

0, 0, 66.565, 60.744, 

0, 0, 104.176, 82.594, 

0, 0, 53.125, 58.211, 

0, 0, 76.0, 73.723, 

0, 0, 54.182, 91.204, 

0, 0, 64.529, 63.058, 

0, 0, 71.922, 72.498, 

0, 0, 4.417, 5