# Compare-12: compare NPI pair (from_npi, to_npi) count in 'undirected Hop teaming' to that in 'directed Hop teaming' and check the overlap pair count.

Description: compare NPI pair (from_npi, to_npi) count in 'undirected Hop teaming' to that in 'directed Hop teaming' and check the overlap pair count.

Starting Author: Amy Jin (amy@careset.com)

Date: July 27th, 2018

https://docs.google.com/spreadsheets/d/1IYg01IpssJaWHo6KxO4_dSDgXtYNFy41S5cIHFLvlGQ/edit#gid=604789549

## Connection to Parenthood Server

In [1]:
# Packages import
import os
import sys
import numpy as np
import pandas as pd
from collections import Counter
import operator
import mysql.connector
import sshtunnel
import pureyaml

# Handle path
project_dir = !pwd  # dir of current script/notebook file
config_file = open(project_dir[0] + "/db.yaml");
config = pureyaml.load(config_file.read());

# Argument dictionary for sshtunnel
ssh_config = {
    'ssh_address_or_host': ('parenthood.set.care', 22),
    'ssh_username':        config['ssh_username'],
    'ssh_password':        config['ssh_password'],
    'remote_bind_address': ('127.0.0.1', 3306),
    'local_bind_address':  ('0.0.0.0', 3333),
}

# Argument dictionary for mysql.connector
mysql_config = {
    'user':     config['mysql_user'],
    'password': config['mysql_passwd'],
    'host':     config['mysql_host'],
    'database': 'patch',
    'port':     3333,
}

# Connect to Parenthood server
with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
    print('SSH tunneling successful on port: {}'.format(tunnel.local_bind_port))
    connection = mysql.connector.connect(**mysql_config)
    cur = connection.cursor()
    print('MySQL server connected successfully!')

SSH tunneling successful on port: 3333
MySQL server connected successfully!


## Test Function

In [2]:
# --------------------------------------- Inputs: ---------------------------------------
# 1) undirected_db:                 Hop teaming database name with undirected table
# 2）undirected_table:              Hop teaming undirected table name
# 3) undirected_npi:                Hop teaming npi column in undirected table
# 4) undirected_npi_dest:           Hop teaming npi_dest column in undirected table

# 1) directed_db:                   Hop teaming database name with directed table
# 2）directed_table:                Hop teaming directed table name
# 3) directed_npi:                  Hop teaming npi column in directed table
# 4) directed_npi_dest:             Hop teaming npi_dest column in directed table
# --------------------------------------- Outputs: --------------------------------------
# Test result:  
#    - Count of distinct (npi 1,npi 2) pair in both files
#    - Count of distinct (npi,npi_dest) pair in hop teaming undirected table
#    - Count of distinct (from_npi,to_npi) pair in hop teaming directed table

def compare_12(undirected_db, undirected_table, undirected_npi, undirected_npi_dest,\
              directed_db, directed_table, directed_npi, directed_npi_dest):

    with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
        connection = mysql.connector.connect(**mysql_config)
        cur = connection.cursor()
        
        # MySQL query to find distinct count of NPI pair in both Hop undirected and directed
        query = ('''
            SELECT COUNT(*)
            FROM {db}.{t} AS table1
            JOIN {db2}.{t2} AS table2
            ON (table1.{npi} = table2.{npi2} AND table1.{npi_dest} = table2.{npi_dest2})
            OR (table1.{npi} = table2.{npi_dest2} AND table1.{npi_dest} = table2.{npi2});
        '''.format(db = undirected_db, t = undirected_table, \
                   npi = undirected_npi, npi_dest = undirected_npi_dest, \
                   db2 = directed_db, t2 = directed_table, \
                   npi2 = directed_npi, npi_dest2 = directed_npi_dest))

        cur.execute(query)
    
        print ('Count of distinct ({},{}) in both {}.{} and {}.{} is:'.format(undirected_npi, undirected_npi_dest, \
                                                                         undirected_db, undirected_table, \
                                                                         directed_db, directed_table))
        
        for row in cur.fetchall():
            for i in range(0,len(row)):
                print (str(row[i]))
         
        # MySQL query to find distinct count of NPI pair in Hop teaming undirected table
        query = ('''
                SELECT COUNT(*)
                FROM (
                       SELECT *
                       FROM {db}.{t1} AS T1
                       GROUP BY T1.{npi}, T1.{npi_dest}
                     ) AS T2;
        '''.format(db = undirected_db, t1 = undirected_table, npi = undirected_npi, npi_dest = undirected_npi_dest))

        cur.execute(query)
    
        print ('Count of distinct ({},{}) pair in {}.{} is:'.format(undirected_npi, undirected_npi_dest,\
                                                                    undirected_db, undirected_table))
        
        for row in cur.fetchall():
            for i in range(0,len(row)):
                print (str(row[i]))

        # MySQL query to find distinct count of NPI pair in Hop teaming directed table
        query = ('''
                SELECT COUNT(*)
                FROM (
                       SELECT *
                       FROM {db}.{t1} AS T1
                       GROUP BY T1.{npi}, T1.{npi_dest}
                     ) AS T2;
        '''.format(db = directed_db, t1 = directed_table, npi = directed_npi, npi_dest = directed_npi_dest))

        cur.execute(query)
    
        print ('Count of distinct ({},{}) pair in {}.{} is:'.format(directed_npi, directed_npi_dest,\
                                                                    directed_db, directed_table))
        
        for row in cur.fetchall():
            for i in range(0,len(row)):
                print (str(row[i]))
            
        cur.close()
        connection.close()

## Test Example

In [10]:
compare_12('_amy', 'test_data_good','icd10cm', \
          'icd10cm', \
         '_amy', 'test_data_bad1','icd_dgns_cd',\
         'icd_dgns_cd')

Count of distinct (icd10cm,icd10cm) in both _amy.test_data_good and _amy.test_data_bad1 is:
0
Count of distinct (icd10cm,icd10cm) pair in _amy.test_data_good is:
2000
Count of distinct (icd_dgns_cd,icd_dgns_cd) pair in _amy.test_data_bad1 is:
138


## Internal Data Example

SELECT COUNT(*)
FROM npi_team_rootgraph_2014.undirected AS table1
JOIN npi_team_rootgraph_2014.directed AS table2
ON (table1.npi = table2.from_npi
    AND table1.npi_dest = table2.to_npi); # 159132078

SELECT COUNT(*)
FROM npi_team_rootgraph_2014.undirected AS table1
JOIN npi_team_rootgraph_2014.directed AS table2
    ON (table1.npi = table2.to_npi
    AND table1.npi_dest = table2.from_npi); # 159132078
    
SELECT COUNT(*)
FROM npi_team_rootgraph_2014.undirected AS table1
JOIN npi_team_rootgraph_2014.directed AS table2
ON (table1.npi = table2.from_npi
    AND table1.npi_dest = table2.to_npi) 
    OR (table1.npi = table2.to_npi
    AND table1.npi_dest = table2.from_npi); # 208995654

In [4]:
compare_12('npi_hop_RQ14', 'undirected', \
           'npi', 'npi_dest',\
           'npi_hop_RQ14', 'directed', \
           'from_npi', 'to_npi')

Count of distinct (npi,npi_dest) in both npi_hop_RQ14.undirected and npi_hop_RQ14.directed is:
208995654
Count of distinct (npi,npi_dest) pair in npi_hop_RQ14.undirected is:
123034516
Count of distinct (from_npi,to_npi) pair in npi_hop_RQ14.directed is:
104497827
