# Count-2: NPI count <= patient count

Description: test if all the provider counts are less or equal to their corresponding patient counts.

Starting Author: Amy Jin (amy@careset.com)

Date: 07/23/2018

https://docs.google.com/spreadsheets/d/1IYg01IpssJaWHo6KxO4_dSDgXtYNFy41S5cIHFLvlGQ/edit#gid=604789549

## Connection to Parenthood Server

In [1]:
# Packages import
import os
import sys
import numpy as np
import pandas as pd
from collections import Counter
import operator
import mysql.connector
import sshtunnel
import pureyaml

# Handle path
project_dir = !pwd  # dir of current script/notebook file
config_file = open(project_dir[0] + "/db.yaml");
config = pureyaml.load(config_file.read());

# Argument dictionary for sshtunnel
ssh_config = {
    'ssh_address_or_host': ('parenthood.set.care', 22),
    'ssh_username':        config['ssh_username'],
    'ssh_password':        config['ssh_password'],
    'remote_bind_address': ('127.0.0.1', 3306),
    'local_bind_address':  ('0.0.0.0', 3333),
}

# Argument dictionary for mysql.connector
mysql_config = {
    'user':     config['mysql_user'],
    'password': config['mysql_passwd'],
    'host':     config['mysql_host'],
    'database': 'patch',
    'port':     3333,
}

# Connect to Parenthood server
with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
    print('SSH tunneling successful on port: {}'.format(tunnel.local_bind_port))
    connection = mysql.connector.connect(**mysql_config)
    cur = connection.cursor()
    print('MySQL server connected successfully!')

SSH tunneling successful on port: 3333
MySQL server connected successfully!


## Test Function

In [2]:
# --------------------------------------- Inputs: ---------------------------------------
# 1) db_name:                database name in server
# 2）table_name:             table name
# 3) npi_cnt:                NPI count column. If there is no NPI_count column in the 
# database, then the npi_count input is 1.
# 4) bene_cnt:               patient count column
# --------------------------------------- Outputs: --------------------------------------
# 1) Test result:            PASS/FAIL
# 2) If FAIL, the test will print out:
#      - the count of rows that fail the test and 
#      - the total count of rows.


def count_2(db_name, table_name, npi_cnt, bene_cnt):
    #table1 = str(db_name) + '.' + str(table_name)
    if npi_cnt == 1:
        print ("Count 2 result: PASS")
    else:
        with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
            connection = mysql.connector.connect(**mysql_config)
            cur = connection.cursor()

            query = ('''
                    SELECT *
                    FROM {db}.{t1}
                    WHERE {col1} > {col2}
                    LIMIT 1;
            '''.format(db = db_name, t1 = table_name, col1 = npi_cnt, col2 = bene_cnt))

            cur.execute(query)
            rows = list(sum(cur.fetchall(), ()))

            if not len(rows):
                print ("Count-2 result for {}.{} : PASS".format(db_name, table_name))
            else:
                print ("Count-2 result for {}.{} : FAIL".format(db_name, table_name) + '\n')

                # MySQL to get failed row count
                query = ('''
                    SELECT COUNT(*)
                    FROM {db}.{t1}
                    WHERE {col1} > {col2};
                '''.format(db = db_name, t1 = table_name, col1 = npi_cnt, col2 = bene_cnt))

                cur.execute(query)

                print ("The count of rows that fail the test is:" + '\n')
                for row in cur.fetchall():
                    for i in range(0,len(row)):
                        print (str(row[i]))
                    print ('\n')

                # MySQL to get total row count
                query = ('''
                    SELECT COUNT(*)
                    FROM {db}.{t1};
                '''.format(db = db_name, t1 = table_name))

                cur.execute(query)

                print ("The total count of rows is:" + '\n')
                for row in cur.fetchall():
                    for i in range(0,len(row)):
                        print (str(row[i]))
                    print ('\n')

            cur.close()
            connection.close()            

## Test Example

In [3]:
count_2('_amy', 'test_data_good', 1, 'cnt_bene_id')

Count 2 result: PASS


In [4]:
count_2('_amy', 'test_data_bad1', 'cnt_pnpi','cnt_bene_id')

Count-2 result for _amy.test_data_bad1 : FAIL

The count of rows that fail the test is:

1698


The total count of rows is:

1999


