# MrPUP-3: There should be no duplicate (rfr_physn_npi, HCPCS, carr_clm_blg_npi_numi) pair in MrPUP v2.

Description: check for if there is any duplicate.

Starting Author: Amy Jin (amy@careset.com)

Date: April 30th, 2018

https://docs.google.com/spreadsheets/d/1IYg01IpssJaWHo6KxO4_dSDgXtYNFy41S5cIHFLvlGQ/edit#gid=604789549

## Connection to Parenthood Server

In [3]:
# Packages import
import os
import sys
import numpy as np
import pandas as pd
from collections import Counter
import operator
import mysql.connector
import sshtunnel
import pureyaml

# Handle path
project_dir = !pwd  # dir of current script/notebook file
config_file = open(project_dir[0] + "/db.yaml");
config = pureyaml.load(config_file.read());

# Argument dictionary for sshtunnel
ssh_config = {
    'ssh_address_or_host': ('parenthood.set.care', 22),
    'ssh_username':        config['ssh_username'],
    'ssh_password':        config['ssh_password'],
    'remote_bind_address': ('127.0.0.1', 3306),
    'local_bind_address':  ('0.0.0.0', 3333),
}

# Argument dictionary for mysql.connector
mysql_config = {
    'user':     config['mysql_user'],
    'password': config['mysql_passwd'],
    'host':     config['mysql_host'],
    'database': 'patch',
    'port':     3333,
}

# Connect to Parenthood server
with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
    print('SSH tunneling successful on port: {}'.format(tunnel.local_bind_port))
    connection = mysql.connector.connect(**mysql_config)
    cur = connection.cursor()
    print('MySQL server connected successfully!')

SSH tunneling successful on port: 3333
MySQL server connected successfully!


## Test Function

In [4]:
# --------------------------------------- Inputs: ---------------------------------------
# 1) db_name:                database name in server
# 2）table_name:             table name
# 3) rfr_physn_npi:          referring physn npi column
# 4) HCPCS:                  HCPCS code
# 5) carr_clm_blg_npi_num:   carr_clm_blg_npi_num
# --------------------------------------- Outputs: --------------------------------------
# 1) Test result:            PASS/FAIL
# 2) If FAIL, the test will print out duplicate (rfr_physn_npi, HCPCS, carr_clm_blg_npi_num).


def mrpup_3(db_name, table_name, rfr_physn_npi, HCPCS, carr_clm_blg_npi_num):
    #table1 = str(db_name) + '.' + str(table_name)
    with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
        connection = mysql.connector.connect(**mysql_config)
        cur = connection.cursor()
        print ('Test file: {}.{}'.format(db_name, table_name))
        print ('\n')         
        
        query = ('''
            SELECT *
            FROM {db}.{t1}
            GROUP BY {col1}, {col2}, {col3}
            HAVING COUNT({col1}) >1
            LIMIT 1;
        '''.format(db = db_name, t1 = table_name, col1 = HCPCS, col2 = rfr_physn_npi, col3=carr_clm_blg_npi_num))

        cur.execute(query)
        rows = list(sum(cur.fetchall(), ()))

        
        if not len(rows):
            print ("Test result: PASS")
        else:
            print ("Test result: FAIL" + '\n')
            print ("The following ({}, {}, {}) pairs have duplicates:".format(HCPCS,rfr_physn_npi,carr_clm_blg_npi_num) + '\n')

            # MySQL query to get dupliate
            query = ('''
                SELECT {col1},{col2}, {col3}, COUNT({col1})
                FROM {db}.{t1}
                GROUP BY {col1}, {col2}, {col3}
                HAVING COUNT({col1}) >1;
            '''.format(db = db_name, t1 = table_name, col1 = HCPCS, col2 = rfr_physn_npi, col3=carr_clm_blg_npi_num))

            cur.execute(query)
            
            for row in cur.fetchall():
                for i in range(0,len(row)):
                    print (str(row[i]), end=", ")
                print ('\n')
            
        cur.close()
        connection.close()

## Test Example

In [5]:
mrpup_3('_amy', 'test_mrpup_good', 'rfr_physn_npi', 'prf_physn_npi', 'hcpcs_cd')

Test file: _amy.test_mrpup_good


Test result: PASS


In [6]:
mrpup_3('_amy', 'test_data_bad1', 'npi', 'npi_dest', 'npi')

Test file: _amy.test_data_bad1


Test result: FAIL

The following (npi_dest, npi, npi) pairs have duplicates:

, , , 8, 

, -271287451, -271287451, 2, 

, 9662639330, 9662639330, 2, 

, hello, hello, 23, 

-4505704229, hello, hello, 2, 

-4645004173, hello, hello, 2, 

-4859573525, , , 2, 

-684527653, hello, hello, 2, 

-863535838, hello, hello, 2, 

1104882604, hello, hello, 2, 

1316918931, hello, hello, 2, 

1366634370, hello, hello, 2, 

1386859205, hello, hello, 2, 

1518058155, hello, hello, 2, 

1518067859, hello, hello, 2, 

1828263176, hello, hello, 2, 

2147483648, , , 10, 

2147483648, -2387881917, -2387881917, 2, 

2147483648, -251040449, -251040449, 2, 

2147483648, -2916655565, -2916655565, 2, 

2147483648, -704006346, -704006346, 2, 

2147483648, 1003089814, 1003089814, 2, 

2147483648, 1003090168, 1003090168, 2, 

2147483648, 1003096272, 1003096272, 2, 

2147483648, 1003108317, 1003108317, 2, 

2147483648, 1464319629, 1464319629, 2, 

2147483648, 4667382876, 4667382876