# Compare-4: patient count in longer timespan >= in shorter timespan

Description: check if longer periods cnt_bene_id's are larger than those of shorter period. Test will pass if it is true.

Starting Author: Amy Jin (amy@careset.com)

Date: May 31st, 2018

https://docs.google.com/spreadsheets/d/1IYg01IpssJaWHo6KxO4_dSDgXtYNFy41S5cIHFLvlGQ/edit#gid=604789549

## Connect to Parenthood Server

In [1]:
# Packages import
import os
import sys
import numpy as np
import pandas as pd
from collections import Counter
import operator
import mysql.connector
import sshtunnel
import pureyaml

# Handle path
project_dir = !pwd  # dir of current script/notebook file
config_file = open(project_dir[0] + "/db.yaml");
config = pureyaml.load(config_file.read());

# Argument dictionary for sshtunnel
ssh_config = {
    'ssh_address_or_host': ('parenthood.set.care', 22),
    'ssh_username':        config['ssh_username'],
    'ssh_password':        config['ssh_password'],
    'remote_bind_address': ('127.0.0.1', 3306),
    'local_bind_address':  ('0.0.0.0', 3333),
}

# Argument dictionary for mysql.connector
mysql_config = {
    'user':     config['mysql_user'],
    'password': config['mysql_passwd'],
    'host':     config['mysql_host'],
    'database': 'patch',
    'port':     3333,
}

# Connect to Parenthood server
with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
    print('SSH tunneling successful on port: {}'.format(tunnel.local_bind_port))
    connection = mysql.connector.connect(**mysql_config)
    cur = connection.cursor()
    print('MySQL server connected successfully!')

SSH tunneling successful on port: 3333
MySQL server connected successfully!


## Test Function

In [2]:
# --------------------------------------- Inputs: ---------------------------------------
# 1) db_name_short:                database name of short timespan table
# 2）table_name_short:             table name of short timespan table
# 3) col_name_short:               test column in short timespan table
# 4) key_short1:                   key 1 in short timespan table
# 5) key_short2:                   key 2 in short timespan table
# 6) key_short3:                   key 3 in short timespan table

# 1) db_name_long:                 database name of short timespan table
# 2）table_name_long:              table name of short timespan table
# 3) col_name_long:                test column in long timespan table
# 4) key_long1:                    key 1 in long timespan table
# 5) key_long2:                    key 2 in long timespan table
# 6) key_long3:                    key 3 in long timespan table

# Note: if there are less than 3 keys, then replace the redundent key with keys used.
# --------------------------------------- Outputs: --------------------------------------
# 1) Test result:            PASS/FAIL
# 2) If FAIL, the test will print out the failed patient count in both files.


def compare_4(db_name_short, table_name_short, col_name_short, \
              key_short1, key_short2, key_short3,  \
              db_name_long, table_name_long, col_name_long, \
              key_long1, key_long2, key_long3 ):
    
    with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
        connection = mysql.connector.connect(**mysql_config)
        cur = connection.cursor()
        
        query = ('''
            SELECT *
            FROM {db1}.{t1} AS table1
            JOIN {db2}.{t2} AS table2
            ON table1.{key1_1} = table2.{key2_1} AND 
                table1.{key1_2} = table2.{key2_2} AND 
                table1.{key1_3} = table2.{key2_3} 
            WHERE table1.{col1} > table2.{col2} 
            LIMIT 1;
        '''.format(db1 = db_name_short, t1 = table_name_short, col1 = col_name_short, \
                   key1_1 = key_short1, key1_2 = key_short2, key1_3 = key_short3 ,\
                   db2 = db_name_long, t2 = table_name_long, col2 = col_name_long, \
                   key2_1 = key_long1, key2_2 = key_long2, key2_3 = key_long3))

        cur.execute(query)
        rows = list(sum(cur.fetchall(), ()))
       
        if not len(rows):
            print ("Comapre-4 test result : PASS")
        else:
            print ("Compare-4 test result : FAIL" + '\n')
            print ("The following rows fail the test:" + '\n')
            
            # MySQL query to find failed rows
            query = ('''
                SELECT table1.{col1} , table2.{col2}
                FROM {db1}.{t1} AS table1
                JOIN {db2}.{t2} AS table2
                ON table1.{key1_1} = table2.{key2_1} AND 
                    table1.{key1_2} = table2.{key2_2} AND 
                    table1.{key1_3} = table2.{key2_3} 
                WHERE table1.{col1} > table2.{col2};
            '''.format(db1 = db_name_short, t1 = table_name_short, col1 = col_name_short, \
                   key1_1 = key_short1, key1_2 = key_short2, key1_3 = key_short3 ,\
                   db2 = db_name_long, t2 = table_name_long, col2 = col_name_long, \
                   key2_1 = key_long1, key2_2 = key_long2, key2_3 = key_long3))

            cur.execute(query)
            print ('SHORT', '{}'.format(col_name_short), \
                   ', LONG','{}'.format(col_name_long))
            for row in cur.fetchall():
                for i in range(0,len(row)):
                    print (str(row[i]), end=", ")
                print ('\n')
                
        cur.close()
        connection.close()

## Test Example

In [3]:
compare_4('_amy', 'test_mrpup_good', 'cnt_bene_id',\
         'rfr_physn_npi','hcpcs_cd', 'prf_physn_npi', \
         '_amy', 'test_mrpup_bad', 'cnt_bene_id',\
         'rfr_physn_npi','hcpcs_cd', 'prf_physn_npi')

Comapre-4 test result : PASS


In [4]:
compare_4('_amy', 'test_mrpup_bad', 'cnt_bene_id',\
         'rfr_physn_npi','hcpcs_cd', 'prf_physn_npi', \
         '_amy', 'test_mrpup_good', 'cnt_bene_id',\
         'rfr_physn_npi','hcpcs_cd', 'prf_physn_npi')

Compare-4 test result : FAIL

The following rows fail the test:

SHORT cnt_bene_id , LONG cnt_bene_id
500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 72, 

500, 44, 

500, 47, 

500, 34, 

500, 68, 

500, 163, 

500, 72, 

500, 194, 

500, 14, 

500, 30, 

500, 11, 

500, 13, 

500, 22, 

500, 13, 

500, 11, 

500, 26, 

500, 13, 

500, 16, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 

500, 1, 



## Internal Data Test Example

In [None]:
compare_4('mrpup_carr', 'mrpup_three_reverse_prf_RIFQ2016', 'cnt_bene_id', \
          'cnt_rfr_physn_npi','hcpcs_cd', 'prf_physn_npi', \
         'mrpup_carr', 'mrpup_three_reverse_prf_16_17_9', 'cnt_bene_id', \
         'cnt_rfr_physn_npi','hcpcs_cd', 'prf_physn_npi',)

Compare-4 test result : FAIL

The following rows fail the test:

SHORT cnt_bene_id , LONG cnt_bene_id
63, 61, 

46, 39, 

90, 74, 

51, 45, 

26, 20, 

19, 12, 

13, 12, 

34, 18, 

34, 31, 

28, 19, 

33, 25, 

28, 20, 

102, 94, 

33, 18, 

40, 23, 

64, 44, 

28, 14, 

29, 15, 

33, 23, 

23, 15, 

32, 27, 

38, 25, 

103, 88, 

33, 20, 

41, 28, 

105, 88, 

31, 19, 

138, 83, 

66, 23, 

78, 54, 

84, 55, 

78, 50, 

28, 16, 

107, 67, 

30, 23, 

16, 14, 

20, 14, 

202, 196, 

67, 64, 

76, 69, 

62, 18, 

59, 20, 

46, 40, 

57, 47, 

38, 31, 

30, 27, 

28, 18, 

70, 60, 

25, 24, 

28, 22, 

55, 26, 

24, 21, 

57, 22, 

51, 22, 

51, 22, 

14, 11, 

59, 52, 

52, 19, 

88, 32, 

32, 21, 

35, 11, 

32, 11, 

16, 13, 

40, 22, 

50, 23, 

26, 18, 

17, 14, 

31, 23, 

14, 12, 

13, 11, 

12, 11, 

124, 35, 

499, 458, 

455, 401, 

161, 141, 

53, 43, 

54, 22, 

44, 30, 

32, 24, 

60, 59, 

57, 40, 

26, 20, 

86, 69, 

31, 25, 

32, 27, 

32, 24, 

29, 13, 

51, 25, 

251,