# Hop-2: volume (patient_count, claim_count) in the undirected version of hop teaming, (A,B) and (B,A) should be the higher volume of the 2 in the directed version

Description: volume (patient_count, claim_count) in the undirected version of hop teaming, (A,B) and (B,A) should be the higher volume of the 2 in the directed version. 

Starting Author: Amy Jin (amy@careset.com)

Date: July 20th, 2018

https://docs.google.com/spreadsheets/d/1IYg01IpssJaWHo6KxO4_dSDgXtYNFy41S5cIHFLvlGQ/edit#gid=604789549

## Connection to Parenthood Server

In [1]:
# Packages import
import os
import sys
import numpy as np
import pandas as pd
from collections import Counter
import operator
import mysql.connector
import sshtunnel
import pureyaml

# Handle path
project_dir = !pwd  # dir of current script/notebook file
config_file = open(project_dir[0] + "/db.yaml");
config = pureyaml.load(config_file.read());

# Argument dictionary for sshtunnel
ssh_config = {
    'ssh_address_or_host': ('parenthood.set.care', 22),
    'ssh_username':        config['ssh_username'],
    'ssh_password':        config['ssh_password'],
    'remote_bind_address': ('127.0.0.1', 3306),
    'local_bind_address':  ('0.0.0.0', 3333),
}

# Argument dictionary for mysql.connector
mysql_config = {
    'user':     config['mysql_user'],
    'password': config['mysql_passwd'],
    'host':     config['mysql_host'],
    'database': 'patch',
    'port':     3333,
}

# Connect to Parenthood server
with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
    print('SSH tunneling successful on port: {}'.format(tunnel.local_bind_port))
    connection = mysql.connector.connect(**mysql_config)
    cur = connection.cursor()
    print('MySQL server connected successfully!')

SSH tunneling successful on port: 3333
MySQL server connected successfully!


## Test Function

In [5]:
# --------------------------------------- Inputs: ---------------------------------------
# 1) db_directed:                       database name of directed table
# 2）table_directed:                    table name of directed table
# 3) from_npi_directed:                 from npi column in directed table 
# 4) to_npi_directed:                   to npi column in directed table 
# 5) patient_count_directed:            patient count in directed table
# 6) transaction_count_directed:        transaction/claim count in directed table

# 7) db_undirected:                     database name of undirected table
# 8）table_undirected:                  table name of undirected table
# 9) npi_undirected:                    from npi column in undirected table 
# 10) npi_dest_undirected:              to npi column in undirected table 
# 11) patient_count_undirected:         patient count in undirected table
# 12) transaction_count_undirected:     transaction/claim count in undirected table
# --------------------------------------- Outputs: --------------------------------------
# 1) Test result:                       PASS/FAIL
# 2) If FAIL, the test will print out:
#       - total count of (npi, npi_dest) pair in undirected table fail the test and
#       - total count of (npi, npi_dest) pair in both directed and undirected table.


def hop_2(db_directed, table_directed, from_npi_directed, to_npi_directed, \
          patient_count_directed, transaction_count_directed,  \
          db_undirected, table_undirected, npi_undirected, npi_dest_undirected, \
          patient_count_undirected, transaction_count_undirected):

    with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
        connection = mysql.connector.connect(**mysql_config)
        cur = connection.cursor()

        query1 = ('''
SELECT COUNT(*)
FROM (
       SELECT
         IF(t1_tran_cnt < t2_tran_cnt AND t1_patient_cnt < t2_patient_cnt, t2_from_npi, t1_from_npi)       AS from_npi_big,
         IF(t1_tran_cnt < t2_tran_cnt AND t1_patient_cnt < t2_patient_cnt, t2_to_npi, t1_to_npi)           AS to_npi_big,
         IF(t1_tran_cnt < t2_tran_cnt, t2_tran_cnt, t1_tran_cnt)                                           AS tran_cnt_big,
         IF(t1_patient_cnt < t2_patient_cnt, t2_patient_cnt, t1_patient_cnt)                               AS patient_cnt_big
       FROM
         (SELECT
            t1.{from_npi}          AS t1_from_npi,
            t1.{to_npi}            AS t1_to_npi,
            t1.{transaction_count} AS t1_tran_cnt,
            t1.{patient_count}     AS t1_patient_cnt,
            t2.{from_npi}          AS t2_from_npi,
            t2.{to_npi}            AS t2_to_npi,
            t2.{transaction_count} AS t2_tran_cnt,
            t2.{patient_count}     AS t2_patient_cnt
          FROM {db_directed}.{t_directed} AS t1
            JOIN {db_directed}.{t_directed} AS t2
              ON t1.{from_npi} = t2.{to_npi} AND t1.{to_npi} = t2.{from_npi}) AS t3) AS t4
  JOIN {db_undirected}.{t_undirected} AS t5
    ON t4.from_npi_big = t5.{npi_undirected} AND t4.to_npi_big = t5.{npi_dest_undirected}
WHERE t4.patient_cnt_big <> t5.{patient_count_undirected} OR t4.tran_cnt_big <> t5.{transaction_count_undirected}; 
        '''.format(db_directed = db_directed, t_directed = table_directed, from_npi = from_npi_directed, to_npi = to_npi_directed, transaction_count = transaction_count_directed, patient_count = patient_count_directed, db_undirected = db_undirected, t_undirected = table_undirected, npi_undirected = npi_undirected, npi_dest_undirected = npi_dest_undirected, patient_count_undirected = patient_count_undirected, transaction_count_undirected= transaction_count_undirected))
        
        cur.execute(query1)
        
        for row in cur.fetchall():
            for i in range(0,len(row)):
                fail_num1= (str(row[i]))

                
        query2 = ('''
SELECT COUNT(*)
FROM (
       SELECT
         IF(t1_tran_cnt < t2_tran_cnt , t2_from_npi, t1_from_npi)              AS from_npi_big,
         IF(t1_tran_cnt < t2_tran_cnt , t2_to_npi, t1_to_npi)                  AS to_npi_big,
         IF(t1_tran_cnt < t2_tran_cnt, t2_tran_cnt, t1_tran_cnt)               AS tran_cnt_big,
         IF(t1_patient_cnt < t2_patient_cnt, t2_patient_cnt, t1_patient_cnt)   AS patient_cnt_big
       FROM
         (SELECT
            t1.{from_npi}          AS t1_from_npi,
            t1.{to_npi}            AS t1_to_npi,
            t1.{transaction_count} AS t1_tran_cnt,
            t1.{patient_count}     AS t1_patient_cnt,
            t2.{from_npi}          AS t2_from_npi,
            t2.{to_npi}            AS t2_to_npi,
            t2.{transaction_count} AS t2_tran_cnt,
            t2.{patient_count}     AS t2_patient_cnt
          FROM {db_directed}.{t_directed} AS t1
            JOIN {db_directed}.{t_directed} AS t2
              ON t1.{from_npi} = t2.{to_npi} AND t1.{to_npi} = t2.{from_npi}) AS t3) AS t4
  JOIN {db_undirected}.{t_undirected} AS t5
    ON t4.from_npi_big = t5.{npi_dest_undirected} AND t4.to_npi_big = t5.{npi_undirected}
WHERE t4.patient_cnt_big <> t5.{patient_count_undirected} OR t4.tran_cnt_big <> t5.{transaction_count_undirected}; 
        '''.format(db_directed = db_directed, t_directed = table_directed, from_npi = from_npi_directed, to_npi = to_npi_directed, transaction_count = transaction_count_directed, patient_count = patient_count_directed, db_undirected = db_undirected, t_undirected = table_undirected, npi_undirected = npi_undirected, npi_dest_undirected = npi_dest_undirected, patient_count_undirected = patient_count_undirected, transaction_count_undirected= transaction_count_undirected))
        
        cur.execute(query2)
              
        for row in cur.fetchall():
            for i in range(0,len(row)):
                fail_num2= (str(row[i]))                
        fail_num = int(fail_num1) + int(fail_num2)
        if fail_num == 0:
            print ("Hop-2 test result: PASS" + '\n')
        else:
            print ("Hop-2 test result: FAIL" + '\n')
            print ("The total count of (npi, npi_dest) pair in undirected table fail the test is:" + '\n') 
            print (fail_num)

                
            query = ('''
    SELECT COUNT(*)
    FROM (
           SELECT
             IF(t1_tran_cnt < t2_tran_cnt , t2_from_npi, t1_from_npi)              AS from_npi_big,
             IF(t1_tran_cnt < t2_tran_cnt , t2_to_npi, t1_to_npi)                  AS to_npi_big,
             IF(t1_tran_cnt < t2_tran_cnt, t2_tran_cnt, t1_tran_cnt)               AS tran_cnt_big,
             IF(t1_patient_cnt < t2_patient_cnt, t2_patient_cnt, t1_patient_cnt)   AS patient_cnt_big
           FROM
             (SELECT
                t1.{from_npi}          AS t1_from_npi,
                t1.{to_npi}            AS t1_to_npi,
                t1.{transaction_count} AS t1_tran_cnt,
                t1.{patient_count}     AS t1_patient_cnt,
                t2.{from_npi}          AS t2_from_npi,
                t2.{to_npi}            AS t2_to_npi,
                t2.{transaction_count} AS t2_tran_cnt,
                t2.{patient_count}     AS t2_patient_cnt
              FROM {db_directed}.{t_directed} AS t1
                JOIN {db_directed}.{t_directed} AS t2
                  ON t1.{from_npi} = t2.{to_npi} AND t1.{to_npi} = t2.{from_npi}) AS t3) AS t4
      JOIN {db_undirected}.{t_undirected} AS t5
        ON t4.from_npi_big = t5.{npi_undirected} AND t4.to_npi_big = t5.{npi_dest_undirected}; 
            '''.format(db_directed = db_directed, t_directed = table_directed, from_npi = from_npi_directed, to_npi = to_npi_directed, transaction_count = transaction_count_directed, patient_count = patient_count_directed, db_undirected = db_undirected, t_undirected = table_undirected, npi_undirected = npi_undirected, npi_dest_undirected = npi_dest_undirected, patient_count_undirected = patient_count_undirected, transaction_count_undirected= transaction_count_undirected))

            cur.execute(query)

            print ("The total count of (npi, npi_dest) pair in both directed and undirected table is:" + '\n')        
            for row in cur.fetchall():
                for i in range(0,len(row)):
                    print (str(row[i]))

        cur.close()
        connection.close()            

## Test Example

In [6]:
hop_2('_amy', 'test_hop_directed_good', 'from_npi', 'to_npi', \
          'patient_count', 'transaction_count',  \
          '_amy', 'test_hop_undirected_good', 'npi', 'npi_dest', \
          'patient_count', 'transaction_count')

Hop-2 test result: PASS



In [7]:
hop_2('_amy', 'test_hop_directed_bad', 'from_npi', 'to_npi', \
          'patient_count', 'transaction_count',  \
          '_amy', 'test_hop_undirected_bad', 'npi', 'npi_dest', \
          'patient_count', 'transaction_count')

Hop-2 test result: FAIL

The total count of (npi, npi_dest) pair in undirected table fail the test is:

168
The total count of (npi, npi_dest) pair in both directed and undirected table is:

542
