# Compare-9: if test compare-3 fails, then check if the same NPI has similar claim count for consecutive timespans

Description: if test Compare-3 fails, then check if the same NPI has similar claim count for consecutive timespans.

Starting Author: Amy Jin (amy@careset.com)

Date: July 27th, 2018

https://docs.google.com/spreadsheets/d/1IYg01IpssJaWHo6KxO4_dSDgXtYNFy41S5cIHFLvlGQ/edit#gid=604789549

## Connect to Parenthood Server

In [2]:
# Packages import
import os
import sys
import numpy as np
import pandas as pd
from collections import Counter
import operator
import mysql.connector
import sshtunnel
import pureyaml

# Handle path
project_dir = !pwd  # dir of current script/notebook file
config_file = open(project_dir[0] + "/db.yaml");
config = pureyaml.load(config_file.read());

# Argument dictionary for sshtunnel
ssh_config = {
    'ssh_address_or_host': ('parenthood.set.care', 22),
    'ssh_username':        config['ssh_username'],
    'ssh_password':        config['ssh_password'],
    'remote_bind_address': ('127.0.0.1', 3306),
    'local_bind_address':  ('0.0.0.0', 3333),
}

# Argument dictionary for mysql.connector
mysql_config = {
    'user':     config['mysql_user'],
    'password': config['mysql_passwd'],
    'host':     config['mysql_host'],
    'database': 'patch',
    'port':     3333,
}

# Connect to Parenthood server
with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
    print('SSH tunneling successful on port: {}'.format(tunnel.local_bind_port))
    connection = mysql.connector.connect(**mysql_config)
    cur = connection.cursor()
    print('MySQL server connected successfully!')

SSH tunneling successful on port: 3333
MySQL server connected successfully!


## Test Function

In [3]:
# --------------------------------------- Inputs: ---------------------------------------
# 1) db_name1:                database name of table 1
# 2）table_name1:             table 1 name
# 3) col_name1:               test column in table 1
# 4) key1_1:                  key 1 of table 1
# 5) key1_2:                  key 2 of table 1
# 6) key1_3:                  key 3 of table 1

# 7) db_name2:                database name of table 2
# 8）table_name2:             table 2 name
# 9) col_name2:               test column in table 2
# 10) key2_1:                 key 1 of table 1
# 11) key2_2:                 key 2 of table 2
# 12) key2_3:                 key 3 of table 3
# --------------------------------------- Outputs: --------------------------------------
# 1) Test result:   
#       - total claim count in table 1 of NPIs appearing in both file
#       - total claim count in table 2 of NPIs appearing in both file


def compare_9(db_name1, table_name1, col_name1, \
              key1_1, key1_2, key1_3, key1_4,\
              db_name2, table_name2, col_name2, \
              key2_1, key2_2, key2_3 , key2_4):
    
    with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
        connection = mysql.connector.connect(**mysql_config)
        cur = connection.cursor()
        
        query = ('''
            SELECT SUM(table1.{col1})
            FROM {db1}.{t1} AS table1
            JOIN {db2}.{t2} AS table2
            ON table1.{key1_1} = table2.{key2_1} 
                AND table1.{key1_2} = table2.{key2_2} 
                AND table1.{key1_3} = table2.{key2_3}
                AND table1.{key1_4} = table2.{key2_4};
        '''.format(db1 = db_name1, t1 = table_name1, col1 = col_name1, key1_1 = key1_1, key1_2 = key1_2, key1_3 = key1_3, key1_4 = key1_4, db2 = db_name2, t2 = table_name2, col2 = col_name2, key2_1 = key2_1, key2_2 = key2_2, key2_3 = key2_3, key2_4= key2_4 ))

        cur.execute(query)
    
        print ('Total {} in {}.{} of NPIs appearing in both files is:'.format(col_name1, db_name1, table_name1))
        
        for row in cur.fetchall():
            for i in range(0,len(row)):
                print (str(row[i]))

        query = ('''
            SELECT SUM(table2.{col2})
            FROM {db1}.{t1} AS table1
            JOIN {db2}.{t2} AS table2
            ON table1.{key1_1} = table2.{key2_1} 
                AND table1.{key1_2} = table2.{key2_2} 
                AND table1.{key1_3} = table2.{key2_3}
                AND table1.{key1_4} = table2.{key2_4};
        '''.format(db1 = db_name1, t1 = table_name1, col1 = col_name1, key1_1 = key1_1, key1_2 = key1_2, key1_3 = key1_3, key1_4 = key1_4, db2 = db_name2, t2 = table_name2, col2 = col_name2, key2_1 = key2_1, key2_2 = key2_2, key2_3 = key2_3, key2_4= key2_4 ))

        cur.execute(query)
    
        print ('Total {} in {}.{} of NPIs appearing in both files is:'.format(col_name2, db_name2, table_name2))
        
        for row in cur.fetchall():
            for i in range(0,len(row)):
                print (str(row[i]))
            
        cur.close()
        connection.close()

## Test Example

In [4]:
compare_9('_amy', 'test_data_good', 'cnt_clm_id', \
         'npi','npi_dest','setting', 'hcpcs_cd', \
         '_amy', 'test_data_bad1', 'cnt_clm_id', \
         'npi','npi_dest','setting', 'hcpcs_cd')

Total cnt_clm_id in _amy.test_data_good of NPIs appearing in both files is:
None
Total cnt_clm_id in _amy.test_data_bad1 of NPIs appearing in both files is:
None


## Internal Data Example

In [None]:
compare_9('npi_inst_icdproc', 'npi_inst_icdproc_RQ17','cnt_clm_id', \
         'npi','npi_class', 'icd_prcdr_cd','setting',\
         'npi_inst_icdproc', 'npi_inst_icdproc_RIFQ2016','cnt_clm_id', \
         'npi','npi_class', 'icd_prcdr_cd','setting')