# Find Zeros on the Left

## Importing Libraries

In [1]:
from numpy.testing import verbose
import pandas as pd
import numpy as np
import scipy as sp
from scipy import stats
from connecting_db import connection
from query_functions import *
import re

## Functions to remove zeros

In [2]:
def is_number(serial: str):
    """
    Function returns False if there is any character that are not a number in the string
    """
    if len(serial)==0:
        return False
    return len(re.findall('\D', serial)) == 0

def convert_numbers(serial: str):

    """
    If the string contains only numbers, the function returns the converted string to integer
    """

    is_number_bool = is_number(serial)

    if is_number_bool:
        return int(serial)
    else: 
        return serial

## Nlyte pre-processing

### Nlyte Reading Data

In [3]:
nlyte_query = read_query_file('./query_files/query_maximo_nlyte.sql')

nlyte_assets = pd.read_sql_query(nlyte_query, connection)


### Getting No Zeros

Removing zeros on the left of all serial numbers

In [4]:
nlyte_assets.SerialNumber = nlyte_assets.SerialNumber.apply(str)
nlyte_assets['is_number'] = nlyte_assets.SerialNumber.apply(is_number)
nlyte_assets['no_left_zeros'] = nlyte_assets.SerialNumber.apply(convert_numbers)

## Pre-processing Maximo

### Reading Data

In [5]:
maximo = pd.read_excel('./MAXIMO_SHARED_20SET2021.xlsx', sheet_name="Sheet1")

In [6]:
columns_to_keep = ['ASSET_NUMBER',
'CUSTOMER',
'ASSET_STATUS',
'MODEL',
'REFERENCE_SERIAL_NUMBER',
'CITY',
'ROOM',
'RACK',
'KVA',
'HW_ELIGIBLE_BY_NLYTE',
'HW_NLYTE_HISTORY',
'HW_NLYTE_LASTSCAN',
'HW_NLYTE_UPDATE',
'HW_MATERIAL_NAME',
]

In [7]:
maximo = maximo.loc[:, columns_to_keep]

### Getting No Zeros

Removing zeros on the left of all serial numbers

In [8]:
maximo.fillna('', inplace=True)

In [9]:
maximo.REFERENCE_SERIAL_NUMBER = maximo.REFERENCE_SERIAL_NUMBER.apply(str)

In [10]:
maximo['no_left_zeros'] = maximo.REFERENCE_SERIAL_NUMBER.apply(convert_numbers)
maximo['is_number'] = maximo.REFERENCE_SERIAL_NUMBER.apply(is_number)

## Finding matches on numerical serials

### Merging both tables, using nlyte as reference

In [11]:
merge= pd.merge(nlyte_assets.query('Description == "Active"'), maximo, right_on='no_left_zeros', left_on='no_left_zeros', how='inner')[['BusinessGroupName', 'CUSTOMER','SerialNumber', 'no_left_zeros', 'REFERENCE_SERIAL_NUMBER', 'is_number_x', 'is_number_y']]


### Condtions to look at

1. The serials have to be considered number in both tables
2. We are looking only at the entries where the Serial Numbers are different when they are strings. That means, there are leading zeros

In [12]:
merge['is_number_both'] = merge.is_number_x & merge.is_number_y


merge['is_different_serial'] = merge.SerialNumber != merge.REFERENCE_SERIAL_NUMBER

In [14]:
merge.query('is_number_both == True and is_different_serial == True').iloc[:, 2:]

Unnamed: 0,SerialNumber,no_left_zeros,REFERENCE_SERIAL_NUMBER,is_number_x,is_number_y,is_number_both,is_different_serial
1416,1801011076,1801011076,1801011076,True,True,True,True
3401,27054,27054,27054,True,True,True,True
4041,2745645,2745645,2745645,True,True,True,True
5030,13201023685,13201023685,13201023685,True,True,True,True
5384,274052013000135,274052013000135,274052013000135,True,True,True,True
5682,1846989,1846989,1846989,True,True,True,True


In [15]:
merge.query('is_number_both == True and is_different_serial == True').to_excel('left_zeros.xlsx', index=False)

In [16]:
merge.shape

(5686, 9)