# ICD10CM-3: first char of ICD is a letter (except for 'U')

QA step description: Check if the first-letter is valid. 
The valid first char of ICD code are: [a-t, v-z, A-T, V-Z]

Starting Author: Amy Jin (amy@careset.com)

Date: April 16th, 2018

https://docs.google.com/spreadsheets/d/1sltXNV5CDfkClTs23rCgOSImqeZDIjJJoTZQwtDvZ18/edit#gid=0

## Connect to Parenthood Server

In [1]:
# Packages import
import os
import sys
import numpy as np
import pandas as pd
from collections import Counter
import operator
import mysql.connector
import sshtunnel
import pureyaml

# Handle path
project_dir = !pwd  # dir of current script/notebook file
config_file = open(project_dir[0] + "/db.yaml");
config = pureyaml.load(config_file.read());

# Argument dictionary for sshtunnel
ssh_config = {
    'ssh_address_or_host': ('parenthood.set.care', 22),
    'ssh_username':        config['ssh_username'],
    'ssh_password':        config['ssh_password'],
    'remote_bind_address': ('127.0.0.1', 3306),
    'local_bind_address':  ('0.0.0.0', 3333),
}

# Argument dictionary for mysql.connector
mysql_config = {
    'user':     config['mysql_user'],
    'password': config['mysql_passwd'],
    'host':     config['mysql_host'],
    'database': 'patch',
    'port':     3333,
}

# Connect to Parenthood server
with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
    print('SSH tunneling successful on port: {}'.format(tunnel.local_bind_port))
    connection = mysql.connector.connect(**mysql_config)
    cur = connection.cursor()
    print('MySQL server connected successfully!')

SSH tunneling successful on port: 3333
MySQL server connected successfully!


## Test Function

In [2]:
# --------------------------------------- Inputs: ---------------------------------------
# 1) db_name:                database name in server
# 2）table_name:             table name
# 3) col_name:               column to test
# --------------------------------------- Outputs: --------------------------------------
# 1) Test result:            PASS/FAIL
# 2) If FAIL, the test will print out all distinct bad ICD codes.


def icd10cm_3(db_name, table_name, col_name):
    #table1 = str(db_name) + '.' + str(table_name)
    with sshtunnel.SSHTunnelForwarder(**ssh_config) as tunnel:
        connection = mysql.connector.connect(**mysql_config)
        cur = connection.cursor()
        print ('Test file: {}.{}'.format(db_name, table_name))
        print ('\n')        
        query = ('''
            SELECT *
            FROM {db}.{t1}
            WHERE   {col1} NOT LIKE 'a%' 
                AND {col1} NOT LIKE 'b%' 
                AND {col1} NOT LIKE 'c%' 
                AND {col1} NOT LIKE 'd%' 
                AND {col1} NOT LIKE 'e%' 
                AND {col1} NOT LIKE 'f%' 
                AND {col1} NOT LIKE 'g%' 
                AND {col1} NOT LIKE 'h%' 
                AND {col1} NOT LIKE 'i%' 
                AND {col1} NOT LIKE 'j%' 
                AND {col1} NOT LIKE 'k%' 
                AND {col1} NOT LIKE 'l%' 
                AND {col1} NOT LIKE 'm%' 
                AND {col1} NOT LIKE 'n%' 
                AND {col1} NOT LIKE 'o%' 
                AND {col1} NOT LIKE 'p%' 
                AND {col1} NOT LIKE 'q%' 
                AND {col1} NOT LIKE 'r%' 
                AND {col1} NOT LIKE 's%' 
                AND {col1} NOT LIKE 't%' 
                AND {col1} NOT LIKE 'v%' 
                AND {col1} NOT LIKE 'w%' 
                AND {col1} NOT LIKE 'x%' 
                AND {col1} NOT LIKE 'y%' 
                AND {col1} NOT LIKE 'z%' 
                AND {col1} NOT LIKE 'A%' 
                AND {col1} NOT LIKE 'B%' 
                AND {col1} NOT LIKE 'C%' 
                AND {col1} NOT LIKE 'D%' 
                AND {col1} NOT LIKE 'E%' 
                AND {col1} NOT LIKE 'F%' 
                AND {col1} NOT LIKE 'G%' 
                AND {col1} NOT LIKE 'H%' 
                AND {col1} NOT LIKE 'I%' 
                AND {col1} NOT LIKE 'J%' 
                AND {col1} NOT LIKE 'K%' 
                AND {col1} NOT LIKE 'L%' 
                AND {col1} NOT LIKE 'M%' 
                AND {col1} NOT LIKE 'N%' 
                AND {col1} NOT LIKE 'O%' 
                AND {col1} NOT LIKE 'P%' 
                AND {col1} NOT LIKE 'Q%' 
                AND {col1} NOT LIKE 'R%' 
                AND {col1} NOT LIKE 'S%' 
                AND {col1} NOT LIKE 'T%' 
                AND {col1} NOT LIKE 'V%' 
                AND {col1} NOT LIKE 'W%' 
                AND {col1} NOT LIKE 'X%' 
                AND {col1} NOT LIKE 'Y%' 
                AND {col1} NOT LIKE 'Z%' 
            LIMIT 1;
        '''.format(db = db_name, t1 = table_name, col1 = col_name))

        cur.execute(query)
        rows = list(sum(cur.fetchall(), ()))

        
        if not len(rows):
            print ("Test result: PASS")
        else:
            print ("Test result: FAIL" + '\n')
            print ("The following {} fail the test:".format(col_name) + '\n')
            
            # MySQL query to get the distinct bad ICD codes
            query = ('''
                SELECT DISTINCT {col1}
                FROM {db}.{t1}
                 WHERE   {col1} NOT LIKE 'a%' 
                    AND {col1} NOT LIKE 'b%' 
                    AND {col1} NOT LIKE 'c%' 
                    AND {col1} NOT LIKE 'd%' 
                    AND {col1} NOT LIKE 'e%' 
                    AND {col1} NOT LIKE 'f%' 
                    AND {col1} NOT LIKE 'g%' 
                    AND {col1} NOT LIKE 'h%' 
                    AND {col1} NOT LIKE 'i%' 
                    AND {col1} NOT LIKE 'j%' 
                    AND {col1} NOT LIKE 'k%' 
                    AND {col1} NOT LIKE 'l%' 
                    AND {col1} NOT LIKE 'm%' 
                    AND {col1} NOT LIKE 'n%' 
                    AND {col1} NOT LIKE 'o%' 
                    AND {col1} NOT LIKE 'p%' 
                    AND {col1} NOT LIKE 'q%' 
                    AND {col1} NOT LIKE 'r%' 
                    AND {col1} NOT LIKE 's%' 
                    AND {col1} NOT LIKE 't%' 
                    AND {col1} NOT LIKE 'v%' 
                    AND {col1} NOT LIKE 'w%' 
                    AND {col1} NOT LIKE 'x%' 
                    AND {col1} NOT LIKE 'y%' 
                    AND {col1} NOT LIKE 'z%' 
                    AND {col1} NOT LIKE 'A%' 
                    AND {col1} NOT LIKE 'B%' 
                    AND {col1} NOT LIKE 'C%' 
                    AND {col1} NOT LIKE 'D%' 
                    AND {col1} NOT LIKE 'E%' 
                    AND {col1} NOT LIKE 'F%' 
                    AND {col1} NOT LIKE 'G%' 
                    AND {col1} NOT LIKE 'H%' 
                    AND {col1} NOT LIKE 'I%' 
                    AND {col1} NOT LIKE 'J%' 
                    AND {col1} NOT LIKE 'K%' 
                    AND {col1} NOT LIKE 'L%' 
                    AND {col1} NOT LIKE 'M%' 
                    AND {col1} NOT LIKE 'N%' 
                    AND {col1} NOT LIKE 'O%' 
                    AND {col1} NOT LIKE 'P%' 
                    AND {col1} NOT LIKE 'Q%' 
                    AND {col1} NOT LIKE 'R%' 
                    AND {col1} NOT LIKE 'S%' 
                    AND {col1} NOT LIKE 'T%' 
                    AND {col1} NOT LIKE 'V%' 
                    AND {col1} NOT LIKE 'W%' 
                    AND {col1} NOT LIKE 'X%' 
                    AND {col1} NOT LIKE 'Y%' 
                    AND {col1} NOT LIKE 'Z%';
                '''.format(db = db_name, t1 = table_name, col1 = col_name))

            cur.execute(query)
            
            for row in cur.fetchall():
                for i in range(0,len(row)):
                    print (str(row[i]), end=", ")
                print ('\n')
            
        cur.close()
        connection.close()

## Test Example

In [3]:
icd10cm_3('_amy', 'test_data_good','icd10cm')

Test file: _amy.test_data_good


Test result: PASS


In [4]:
icd10cm_3('_amy', 'test_data_bad1','icd_dgns_cd')

Test file: _amy.test_data_bad1


Test result: FAIL

The following icd_dgns_cd fail the test:

U, 

U0, 

, 

11111111, 

